2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
78 if (!gen7_avc_surface)
81 gen7_avc_surface->base.frame_store_id = -1;
82 assert((obj_surface->size & 0x3f) == 0);
83 obj_surface->private_data = gen7_avc_surface;
86 /* DMV buffers now relate to the whole frame, irrespective of
88 if (gen7_avc_surface->dmv_top == NULL) {
89 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90 "direct mv w/r buffer",
91 width_in_mbs * height_in_mbs * 128,
93 assert(gen7_avc_surface->dmv_top);
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99 struct decode_state *decode_state,
101 struct gen7_mfd_context *gen7_mfd_context)
103 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
105 assert(standard_select == MFX_FORMAT_MPEG2 ||
106 standard_select == MFX_FORMAT_AVC ||
107 standard_select == MFX_FORMAT_VC1 ||
108 standard_select == MFX_FORMAT_JPEG ||
109 standard_select == MFX_FORMAT_VP8);
111 BEGIN_BCS_BATCH(batch, 5);
112 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
114 (MFX_LONG_MODE << 17) | /* Currently only support long format */
115 (MFD_MODE_VLD << 15) | /* VLD mode */
116 (0 << 10) | /* disable Stream-Out */
117 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
118 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
119 (0 << 5) | /* not in stitch mode */
120 (MFX_CODEC_DECODE << 4) | /* decoding mode */
121 (standard_select << 0));
123 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
124 (0 << 3) | /* terminate if AVC mbdata error occurs */
125 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
128 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129 OUT_BCS_BATCH(batch, 0); /* reserved */
130 ADVANCE_BCS_BATCH(batch);
134 gen8_mfd_surface_state(VADriverContextP ctx,
135 struct decode_state *decode_state,
137 struct gen7_mfd_context *gen7_mfd_context)
139 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140 struct object_surface *obj_surface = decode_state->render_object;
141 unsigned int y_cb_offset;
142 unsigned int y_cr_offset;
143 unsigned int surface_format;
147 y_cb_offset = obj_surface->y_cb_offset;
148 y_cr_offset = obj_surface->y_cr_offset;
150 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
153 BEGIN_BCS_BATCH(batch, 6);
154 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155 OUT_BCS_BATCH(batch, 0);
157 ((obj_surface->orig_height - 1) << 18) |
158 ((obj_surface->orig_width - 1) << 4));
160 (surface_format << 28) | /* 420 planar YUV surface */
161 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162 (0 << 22) | /* surface object control state, ignored */
163 ((obj_surface->width - 1) << 3) | /* pitch */
164 (0 << 2) | /* must be 0 */
165 (1 << 1) | /* must be tiled */
166 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
168 (0 << 16) | /* X offset for U(Cb), must be 0 */
169 (y_cb_offset << 0)); /* Y offset for U(Cb) */
171 (0 << 16) | /* X offset for V(Cr), must be 0 */
172 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173 ADVANCE_BCS_BATCH(batch);
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178 struct decode_state *decode_state,
180 struct gen7_mfd_context *gen7_mfd_context)
182 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
185 BEGIN_BCS_BATCH(batch, 61);
186 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
187 /* Pre-deblock 1-3 */
188 if (gen7_mfd_context->pre_deblocking_output.valid)
189 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
190 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
193 OUT_BCS_BATCH(batch, 0);
195 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
197 /* Post-debloing 4-6 */
198 if (gen7_mfd_context->post_deblocking_output.valid)
199 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
200 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
203 OUT_BCS_BATCH(batch, 0);
205 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
208 /* uncompressed-video & stream out 7-12 */
209 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211 OUT_BCS_BATCH(batch, 0);
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
216 /* intra row-store scratch 13-15 */
217 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
218 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
219 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
222 OUT_BCS_BATCH(batch, 0);
224 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 /* deblocking-filter-row-store 16-18 */
227 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
228 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
229 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
232 OUT_BCS_BATCH(batch, 0);
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
237 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
238 struct object_surface *obj_surface;
240 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
241 gen7_mfd_context->reference_surface[i].obj_surface &&
242 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
243 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
245 OUT_BCS_RELOC(batch, obj_surface->bo,
246 I915_GEM_DOMAIN_INSTRUCTION, 0,
249 OUT_BCS_BATCH(batch, 0);
252 OUT_BCS_BATCH(batch, 0);
255 /* reference property 51 */
256 OUT_BCS_BATCH(batch, 0);
258 /* Macroblock status & ILDB 52-57 */
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
266 /* the second Macroblock status 58-60 */
267 OUT_BCS_BATCH(batch, 0);
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
271 ADVANCE_BCS_BATCH(batch);
275 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
276 dri_bo *slice_data_bo,
278 struct gen7_mfd_context *gen7_mfd_context)
280 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
282 BEGIN_BCS_BATCH(batch, 26);
283 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
285 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
286 OUT_BCS_BATCH(batch, 0);
287 OUT_BCS_BATCH(batch, 0);
288 /* Upper bound 4-5 */
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
292 /* MFX indirect MV 6-10 */
293 OUT_BCS_BATCH(batch, 0);
294 OUT_BCS_BATCH(batch, 0);
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
299 /* MFX IT_COFF 11-15 */
300 OUT_BCS_BATCH(batch, 0);
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
306 /* MFX IT_DBLK 16-20 */
307 OUT_BCS_BATCH(batch, 0);
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
313 /* MFX PAK_BSE object for encoder 21-25 */
314 OUT_BCS_BATCH(batch, 0);
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
320 ADVANCE_BCS_BATCH(batch);
324 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
325 struct decode_state *decode_state,
327 struct gen7_mfd_context *gen7_mfd_context)
329 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
331 BEGIN_BCS_BATCH(batch, 10);
332 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
334 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
335 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
336 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
339 OUT_BCS_BATCH(batch, 0);
341 OUT_BCS_BATCH(batch, 0);
342 OUT_BCS_BATCH(batch, 0);
343 /* MPR Row Store Scratch buffer 4-6 */
344 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
345 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
346 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
349 OUT_BCS_BATCH(batch, 0);
351 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, 0);
355 if (gen7_mfd_context->bitplane_read_buffer.valid)
356 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
357 I915_GEM_DOMAIN_INSTRUCTION, 0,
360 OUT_BCS_BATCH(batch, 0);
361 OUT_BCS_BATCH(batch, 0);
362 OUT_BCS_BATCH(batch, 0);
363 ADVANCE_BCS_BATCH(batch);
367 gen8_mfd_qm_state(VADriverContextP ctx,
371 struct gen7_mfd_context *gen7_mfd_context)
373 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
374 unsigned int qm_buffer[16];
376 assert(qm_length <= 16 * 4);
377 memcpy(qm_buffer, qm, qm_length);
379 BEGIN_BCS_BATCH(batch, 18);
380 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
381 OUT_BCS_BATCH(batch, qm_type << 0);
382 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
383 ADVANCE_BCS_BATCH(batch);
387 gen8_mfd_avc_img_state(VADriverContextP ctx,
388 struct decode_state *decode_state,
389 struct gen7_mfd_context *gen7_mfd_context)
391 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
393 int mbaff_frame_flag;
394 unsigned int width_in_mbs, height_in_mbs;
395 VAPictureParameterBufferH264 *pic_param;
397 assert(decode_state->pic_param && decode_state->pic_param->buffer);
398 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
399 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
401 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
403 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
408 if ((img_struct & 0x1) == 0x1) {
409 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
411 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
414 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
415 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
416 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
418 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
421 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
422 !pic_param->pic_fields.bits.field_pic_flag);
424 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
425 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
427 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
428 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
429 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
430 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
432 BEGIN_BCS_BATCH(batch, 17);
433 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
435 (width_in_mbs * height_in_mbs - 1));
437 ((height_in_mbs - 1) << 16) |
438 ((width_in_mbs - 1) << 0));
440 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
441 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
442 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
443 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
444 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
445 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
448 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
449 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
450 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
451 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
452 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
453 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
454 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
455 (mbaff_frame_flag << 1) |
456 (pic_param->pic_fields.bits.field_pic_flag << 0));
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 OUT_BCS_BATCH(batch, 0);
466 OUT_BCS_BATCH(batch, 0);
467 OUT_BCS_BATCH(batch, 0);
468 OUT_BCS_BATCH(batch, 0);
469 ADVANCE_BCS_BATCH(batch);
473 gen8_mfd_avc_qm_state(VADriverContextP ctx,
474 struct decode_state *decode_state,
475 struct gen7_mfd_context *gen7_mfd_context)
477 VAIQMatrixBufferH264 *iq_matrix;
478 VAPictureParameterBufferH264 *pic_param;
480 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
481 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
483 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
485 assert(decode_state->pic_param && decode_state->pic_param->buffer);
486 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
491 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
492 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
493 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
498 gen8_mfd_avc_picid_state(VADriverContextP ctx,
499 struct decode_state *decode_state,
500 struct gen7_mfd_context *gen7_mfd_context)
502 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
503 gen7_mfd_context->reference_surface);
507 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
508 struct decode_state *decode_state,
509 VAPictureParameterBufferH264 *pic_param,
510 VASliceParameterBufferH264 *slice_param,
511 struct gen7_mfd_context *gen7_mfd_context)
513 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
514 struct object_surface *obj_surface;
515 GenAvcSurface *gen7_avc_surface;
516 VAPictureH264 *va_pic;
519 BEGIN_BCS_BATCH(batch, 71);
520 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
522 /* reference surfaces 0..15 */
523 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
524 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
525 gen7_mfd_context->reference_surface[i].obj_surface &&
526 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
528 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
529 gen7_avc_surface = obj_surface->private_data;
531 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
532 I915_GEM_DOMAIN_INSTRUCTION, 0,
534 OUT_BCS_BATCH(batch, 0);
536 OUT_BCS_BATCH(batch, 0);
537 OUT_BCS_BATCH(batch, 0);
541 OUT_BCS_BATCH(batch, 0);
543 /* the current decoding frame/field */
544 va_pic = &pic_param->CurrPic;
545 obj_surface = decode_state->render_object;
546 assert(obj_surface->bo && obj_surface->private_data);
547 gen7_avc_surface = obj_surface->private_data;
549 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
550 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
553 OUT_BCS_BATCH(batch, 0);
554 OUT_BCS_BATCH(batch, 0);
557 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
558 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
561 const VAPictureH264 * const va_pic = avc_find_picture(
562 obj_surface->base.id, pic_param->ReferenceFrames,
563 ARRAY_ELEMS(pic_param->ReferenceFrames));
565 assert(va_pic != NULL);
566 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
567 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
569 OUT_BCS_BATCH(batch, 0);
570 OUT_BCS_BATCH(batch, 0);
574 va_pic = &pic_param->CurrPic;
575 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
576 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
578 ADVANCE_BCS_BATCH(batch);
582 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
583 VAPictureParameterBufferH264 *pic_param,
584 VASliceParameterBufferH264 *next_slice_param,
585 struct gen7_mfd_context *gen7_mfd_context)
587 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
591 gen8_mfd_avc_slice_state(VADriverContextP ctx,
592 VAPictureParameterBufferH264 *pic_param,
593 VASliceParameterBufferH264 *slice_param,
594 VASliceParameterBufferH264 *next_slice_param,
595 struct gen7_mfd_context *gen7_mfd_context)
597 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
598 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
599 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
600 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
601 int num_ref_idx_l0, num_ref_idx_l1;
602 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
603 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
604 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
607 if (slice_param->slice_type == SLICE_TYPE_I ||
608 slice_param->slice_type == SLICE_TYPE_SI) {
609 slice_type = SLICE_TYPE_I;
610 } else if (slice_param->slice_type == SLICE_TYPE_P ||
611 slice_param->slice_type == SLICE_TYPE_SP) {
612 slice_type = SLICE_TYPE_P;
614 assert(slice_param->slice_type == SLICE_TYPE_B);
615 slice_type = SLICE_TYPE_B;
618 if (slice_type == SLICE_TYPE_I) {
619 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
620 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
623 } else if (slice_type == SLICE_TYPE_P) {
624 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
625 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
628 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
629 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
632 first_mb_in_slice = slice_param->first_mb_in_slice;
633 slice_hor_pos = first_mb_in_slice % width_in_mbs;
634 slice_ver_pos = first_mb_in_slice / width_in_mbs;
637 slice_ver_pos = slice_ver_pos << 1;
638 if (next_slice_param) {
639 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
640 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
641 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
644 next_slice_ver_pos = next_slice_ver_pos << 1;
646 next_slice_hor_pos = 0;
647 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
650 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
651 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
652 OUT_BCS_BATCH(batch, slice_type);
654 (num_ref_idx_l1 << 24) |
655 (num_ref_idx_l0 << 16) |
656 (slice_param->chroma_log2_weight_denom << 8) |
657 (slice_param->luma_log2_weight_denom << 0));
659 (slice_param->direct_spatial_mv_pred_flag << 29) |
660 (slice_param->disable_deblocking_filter_idc << 27) |
661 (slice_param->cabac_init_idc << 24) |
662 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
663 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
664 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
666 (slice_ver_pos << 24) |
667 (slice_hor_pos << 16) |
668 (first_mb_in_slice << 0));
670 (next_slice_ver_pos << 16) |
671 (next_slice_hor_pos << 0));
673 (next_slice_param == NULL) << 19); /* last slice flag */
674 OUT_BCS_BATCH(batch, 0);
675 OUT_BCS_BATCH(batch, 0);
676 OUT_BCS_BATCH(batch, 0);
677 OUT_BCS_BATCH(batch, 0);
678 ADVANCE_BCS_BATCH(batch);
682 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
683 VAPictureParameterBufferH264 *pic_param,
684 VASliceParameterBufferH264 *slice_param,
685 struct gen7_mfd_context *gen7_mfd_context)
687 gen6_send_avc_ref_idx_state(
688 gen7_mfd_context->base.batch,
690 gen7_mfd_context->reference_surface
695 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
696 VAPictureParameterBufferH264 *pic_param,
697 VASliceParameterBufferH264 *slice_param,
698 struct gen7_mfd_context *gen7_mfd_context)
700 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
701 int i, j, num_weight_offset_table = 0;
702 short weightoffsets[32 * 6];
704 if ((slice_param->slice_type == SLICE_TYPE_P ||
705 slice_param->slice_type == SLICE_TYPE_SP) &&
706 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
707 num_weight_offset_table = 1;
710 if ((slice_param->slice_type == SLICE_TYPE_B) &&
711 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
712 num_weight_offset_table = 2;
715 for (i = 0; i < num_weight_offset_table; i++) {
716 BEGIN_BCS_BATCH(batch, 98);
717 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
718 OUT_BCS_BATCH(batch, i);
721 for (j = 0; j < 32; j++) {
722 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
723 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
724 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
725 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
726 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
727 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
730 for (j = 0; j < 32; j++) {
731 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
732 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
733 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
734 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
735 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
736 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
740 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
741 ADVANCE_BCS_BATCH(batch);
746 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
747 VAPictureParameterBufferH264 *pic_param,
748 VASliceParameterBufferH264 *slice_param,
749 dri_bo *slice_data_bo,
750 VASliceParameterBufferH264 *next_slice_param,
751 struct gen7_mfd_context *gen7_mfd_context)
753 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
754 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
756 pic_param->pic_fields.bits.entropy_coding_mode_flag);
758 /* the input bitsteam format on GEN7 differs from GEN6 */
759 BEGIN_BCS_BATCH(batch, 6);
760 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
762 (slice_param->slice_data_size));
763 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
771 ((slice_data_bit_offset >> 3) << 16) |
775 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
776 (slice_data_bit_offset & 0x7));
777 OUT_BCS_BATCH(batch, 0);
778 ADVANCE_BCS_BATCH(batch);
782 gen8_mfd_avc_context_init(
783 VADriverContextP ctx,
784 struct gen7_mfd_context *gen7_mfd_context
787 /* Initialize flat scaling lists */
788 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
792 gen8_mfd_avc_decode_init(VADriverContextP ctx,
793 struct decode_state *decode_state,
794 struct gen7_mfd_context *gen7_mfd_context)
796 VAPictureParameterBufferH264 *pic_param;
797 VASliceParameterBufferH264 *slice_param;
798 struct i965_driver_data *i965 = i965_driver_data(ctx);
799 struct object_surface *obj_surface;
801 int i, j, enable_avc_ildb = 0;
802 unsigned int width_in_mbs, height_in_mbs;
804 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
805 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
806 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
808 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
809 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
810 assert((slice_param->slice_type == SLICE_TYPE_I) ||
811 (slice_param->slice_type == SLICE_TYPE_SI) ||
812 (slice_param->slice_type == SLICE_TYPE_P) ||
813 (slice_param->slice_type == SLICE_TYPE_SP) ||
814 (slice_param->slice_type == SLICE_TYPE_B));
816 if (slice_param->disable_deblocking_filter_idc != 1) {
825 assert(decode_state->pic_param && decode_state->pic_param->buffer);
826 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
827 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
828 gen7_mfd_context->reference_surface);
829 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
830 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
831 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
832 assert(height_in_mbs > 0 && height_in_mbs <= 256);
834 /* Current decoded picture */
835 obj_surface = decode_state->render_object;
836 if (pic_param->pic_fields.bits.reference_pic_flag)
837 obj_surface->flags |= SURFACE_REFERENCED;
839 obj_surface->flags &= ~SURFACE_REFERENCED;
841 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
842 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
844 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
845 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
846 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
847 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
849 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
850 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
851 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
852 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
854 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
855 bo = dri_bo_alloc(i965->intel.bufmgr,
860 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
861 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
863 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
864 bo = dri_bo_alloc(i965->intel.bufmgr,
865 "deblocking filter row store",
866 width_in_mbs * 64 * 4,
869 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
870 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
872 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
875 width_in_mbs * 64 * 2,
878 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
881 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
882 bo = dri_bo_alloc(i965->intel.bufmgr,
884 width_in_mbs * 64 * 2,
887 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
888 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
890 gen7_mfd_context->bitplane_read_buffer.valid = 0;
894 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
895 struct decode_state *decode_state,
896 struct gen7_mfd_context *gen7_mfd_context)
898 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
899 VAPictureParameterBufferH264 *pic_param;
900 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
901 dri_bo *slice_data_bo;
904 assert(decode_state->pic_param && decode_state->pic_param->buffer);
905 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
906 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
908 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
909 intel_batchbuffer_emit_mi_flush(batch);
910 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
911 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
912 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
913 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
914 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
915 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
916 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
918 for (j = 0; j < decode_state->num_slice_params; j++) {
919 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
920 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
921 slice_data_bo = decode_state->slice_datas[j]->bo;
922 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
924 if (j == decode_state->num_slice_params - 1)
925 next_slice_group_param = NULL;
927 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
929 if (j == 0 && slice_param->first_mb_in_slice)
930 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
932 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
933 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
934 assert((slice_param->slice_type == SLICE_TYPE_I) ||
935 (slice_param->slice_type == SLICE_TYPE_SI) ||
936 (slice_param->slice_type == SLICE_TYPE_P) ||
937 (slice_param->slice_type == SLICE_TYPE_SP) ||
938 (slice_param->slice_type == SLICE_TYPE_B));
940 if (i < decode_state->slice_params[j]->num_elements - 1)
941 next_slice_param = slice_param + 1;
943 next_slice_param = next_slice_group_param;
945 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
946 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
947 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
948 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
949 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
954 intel_batchbuffer_end_atomic(batch);
955 intel_batchbuffer_flush(batch);
959 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
960 struct decode_state *decode_state,
961 struct gen7_mfd_context *gen7_mfd_context)
963 VAPictureParameterBufferMPEG2 *pic_param;
964 struct i965_driver_data *i965 = i965_driver_data(ctx);
965 struct object_surface *obj_surface;
967 unsigned int width_in_mbs;
969 assert(decode_state->pic_param && decode_state->pic_param->buffer);
970 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
971 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
973 mpeg2_set_reference_surfaces(
975 gen7_mfd_context->reference_surface,
980 /* Current decoded picture */
981 obj_surface = decode_state->render_object;
982 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
984 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
985 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
986 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
987 gen7_mfd_context->pre_deblocking_output.valid = 1;
989 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
990 bo = dri_bo_alloc(i965->intel.bufmgr,
995 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
996 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
998 gen7_mfd_context->post_deblocking_output.valid = 0;
999 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1000 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1001 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1002 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1006 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1007 struct decode_state *decode_state,
1008 struct gen7_mfd_context *gen7_mfd_context)
1010 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1011 VAPictureParameterBufferMPEG2 *pic_param;
1012 unsigned int slice_concealment_disable_bit = 0;
1014 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1015 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1017 slice_concealment_disable_bit = 1;
1019 BEGIN_BCS_BATCH(batch, 13);
1020 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1021 OUT_BCS_BATCH(batch,
1022 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1023 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1024 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1025 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1026 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1027 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1028 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1029 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1030 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1031 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1032 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1033 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1034 OUT_BCS_BATCH(batch,
1035 pic_param->picture_coding_type << 9);
1036 OUT_BCS_BATCH(batch,
1037 (slice_concealment_disable_bit << 31) |
1038 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1039 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1040 OUT_BCS_BATCH(batch, 0);
1041 OUT_BCS_BATCH(batch, 0);
1042 OUT_BCS_BATCH(batch, 0);
1043 OUT_BCS_BATCH(batch, 0);
1044 OUT_BCS_BATCH(batch, 0);
1045 OUT_BCS_BATCH(batch, 0);
1046 OUT_BCS_BATCH(batch, 0);
1047 OUT_BCS_BATCH(batch, 0);
1048 OUT_BCS_BATCH(batch, 0);
1049 ADVANCE_BCS_BATCH(batch);
1053 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1054 struct decode_state *decode_state,
1055 struct gen7_mfd_context *gen7_mfd_context)
1057 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1060 /* Update internal QM state */
1061 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1062 VAIQMatrixBufferMPEG2 * const iq_matrix =
1063 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1065 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1066 iq_matrix->load_intra_quantiser_matrix) {
1067 gen_iq_matrix->load_intra_quantiser_matrix =
1068 iq_matrix->load_intra_quantiser_matrix;
1069 if (iq_matrix->load_intra_quantiser_matrix) {
1070 for (j = 0; j < 64; j++)
1071 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1072 iq_matrix->intra_quantiser_matrix[j];
1076 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1077 iq_matrix->load_non_intra_quantiser_matrix) {
1078 gen_iq_matrix->load_non_intra_quantiser_matrix =
1079 iq_matrix->load_non_intra_quantiser_matrix;
1080 if (iq_matrix->load_non_intra_quantiser_matrix) {
1081 for (j = 0; j < 64; j++)
1082 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1083 iq_matrix->non_intra_quantiser_matrix[j];
1088 /* Commit QM state to HW */
1089 for (i = 0; i < 2; i++) {
1090 unsigned char *qm = NULL;
1094 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1095 qm = gen_iq_matrix->intra_quantiser_matrix;
1096 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1099 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1100 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1101 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1108 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1113 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1114 VAPictureParameterBufferMPEG2 *pic_param,
1115 VASliceParameterBufferMPEG2 *slice_param,
1116 VASliceParameterBufferMPEG2 *next_slice_param,
1117 struct gen7_mfd_context *gen7_mfd_context)
1119 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1120 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1121 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1123 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1124 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1126 is_field_pic_wa = is_field_pic &&
1127 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1129 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1130 hpos0 = slice_param->slice_horizontal_position;
1132 if (next_slice_param == NULL) {
1133 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1136 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1137 hpos1 = next_slice_param->slice_horizontal_position;
1140 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1142 BEGIN_BCS_BATCH(batch, 5);
1143 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1144 OUT_BCS_BATCH(batch,
1145 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1146 OUT_BCS_BATCH(batch,
1147 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1148 OUT_BCS_BATCH(batch,
1152 (next_slice_param == NULL) << 5 |
1153 (next_slice_param == NULL) << 3 |
1154 (slice_param->macroblock_offset & 0x7));
1155 OUT_BCS_BATCH(batch,
1156 (slice_param->quantiser_scale_code << 24) |
1157 (vpos1 << 8 | hpos1));
1158 ADVANCE_BCS_BATCH(batch);
1162 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1163 struct decode_state *decode_state,
1164 struct gen7_mfd_context *gen7_mfd_context)
1166 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1167 VAPictureParameterBufferMPEG2 *pic_param;
1168 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1169 dri_bo *slice_data_bo;
1172 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1173 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1175 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1176 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1177 intel_batchbuffer_emit_mi_flush(batch);
1178 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1179 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1180 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1181 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1182 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1183 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1185 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1186 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1187 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1189 for (j = 0; j < decode_state->num_slice_params; j++) {
1190 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1191 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1192 slice_data_bo = decode_state->slice_datas[j]->bo;
1193 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195 if (j == decode_state->num_slice_params - 1)
1196 next_slice_group_param = NULL;
1198 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1200 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1201 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1203 if (i < decode_state->slice_params[j]->num_elements - 1)
1204 next_slice_param = slice_param + 1;
1206 next_slice_param = next_slice_group_param;
1208 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1213 intel_batchbuffer_end_atomic(batch);
1214 intel_batchbuffer_flush(batch);
1217 static const int va_to_gen7_vc1_pic_type[5] = {
1221 GEN7_VC1_BI_PICTURE,
1225 static const int va_to_gen7_vc1_mv[4] = {
1227 2, /* 1-MV half-pel */
1228 3, /* 1-MV half-pef bilinear */
1232 static const int b_picture_scale_factor[21] = {
1233 128, 85, 170, 64, 192,
1234 51, 102, 153, 204, 43,
1235 215, 37, 74, 111, 148,
1236 185, 222, 32, 96, 160,
1240 static const int va_to_gen7_vc1_condover[3] = {
1246 static const int va_to_gen7_vc1_profile[4] = {
1247 GEN7_VC1_SIMPLE_PROFILE,
1248 GEN7_VC1_MAIN_PROFILE,
1249 GEN7_VC1_RESERVED_PROFILE,
1250 GEN7_VC1_ADVANCED_PROFILE
1254 gen8_mfd_free_vc1_surface(void **data)
1256 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1258 if (!gen7_vc1_surface)
1261 dri_bo_unreference(gen7_vc1_surface->dmv);
1262 free(gen7_vc1_surface);
1267 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1268 VAPictureParameterBufferVC1 *pic_param,
1269 struct object_surface *obj_surface)
1271 struct i965_driver_data *i965 = i965_driver_data(ctx);
1272 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1273 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1274 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1276 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1278 if (!gen7_vc1_surface) {
1279 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1281 if (!gen7_vc1_surface)
1284 assert((obj_surface->size & 0x3f) == 0);
1285 obj_surface->private_data = gen7_vc1_surface;
1288 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1290 if (gen7_vc1_surface->dmv == NULL) {
1291 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1292 "direct mv w/r buffer",
1293 width_in_mbs * height_in_mbs * 64,
1299 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1300 struct decode_state *decode_state,
1301 struct gen7_mfd_context *gen7_mfd_context)
1303 VAPictureParameterBufferVC1 *pic_param;
1304 struct i965_driver_data *i965 = i965_driver_data(ctx);
1305 struct object_surface *obj_surface;
1310 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1311 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1312 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1313 picture_type = pic_param->picture_fields.bits.picture_type;
1315 intel_update_vc1_frame_store_index(ctx,
1318 gen7_mfd_context->reference_surface);
1320 /* Current decoded picture */
1321 obj_surface = decode_state->render_object;
1322 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1323 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1325 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1326 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1327 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1328 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1330 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1331 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1332 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1333 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1335 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1336 bo = dri_bo_alloc(i965->intel.bufmgr,
1341 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1342 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1344 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1345 bo = dri_bo_alloc(i965->intel.bufmgr,
1346 "deblocking filter row store",
1347 width_in_mbs * 7 * 64,
1350 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1351 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1353 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1354 bo = dri_bo_alloc(i965->intel.bufmgr,
1355 "bsd mpc row store",
1359 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1360 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1362 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1364 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1365 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1367 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1368 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1369 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1370 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1372 uint8_t *src = NULL, *dst = NULL;
1374 assert(decode_state->bit_plane->buffer);
1375 src = decode_state->bit_plane->buffer;
1377 bo = dri_bo_alloc(i965->intel.bufmgr,
1379 bitplane_width * height_in_mbs,
1382 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1384 dri_bo_map(bo, True);
1385 assert(bo->virtual);
1388 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1389 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1390 int src_index, dst_index;
1394 src_index = (src_h * width_in_mbs + src_w) / 2;
1395 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1396 src_value = ((src[src_index] >> src_shift) & 0xf);
1398 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1402 dst_index = src_w / 2;
1403 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1407 dst[src_w / 2] >>= 4;
1409 dst += bitplane_width;
1414 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1418 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1419 struct decode_state *decode_state,
1420 struct gen7_mfd_context *gen7_mfd_context)
1422 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1423 VAPictureParameterBufferVC1 *pic_param;
1424 struct object_surface *obj_surface;
1425 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1426 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1427 int unified_mv_mode;
1428 int ref_field_pic_polarity = 0;
1429 int scale_factor = 0;
1431 int dmv_surface_valid = 0;
1437 int interpolation_mode = 0;
1439 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1440 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1442 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1443 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1444 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1445 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1446 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1447 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1448 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1449 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1452 alt_pquant_config = 0;
1453 alt_pquant_edge_mask = 0;
1454 } else if (dquant == 2) {
1455 alt_pquant_config = 1;
1456 alt_pquant_edge_mask = 0xf;
1458 assert(dquant == 1);
1459 if (dquantfrm == 0) {
1460 alt_pquant_config = 0;
1461 alt_pquant_edge_mask = 0;
1464 assert(dquantfrm == 1);
1465 alt_pquant_config = 1;
1467 switch (dqprofile) {
1469 if (dqbilevel == 0) {
1470 alt_pquant_config = 2;
1471 alt_pquant_edge_mask = 0;
1473 assert(dqbilevel == 1);
1474 alt_pquant_config = 3;
1475 alt_pquant_edge_mask = 0;
1480 alt_pquant_edge_mask = 0xf;
1485 alt_pquant_edge_mask = 0x9;
1487 alt_pquant_edge_mask = (0x3 << dqdbedge);
1492 alt_pquant_edge_mask = (0x1 << dqsbedge);
1501 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1502 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1503 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1505 assert(pic_param->mv_fields.bits.mv_mode < 4);
1506 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1509 if (pic_param->sequence_fields.bits.interlace == 1 &&
1510 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1511 /* FIXME: calculate reference field picture polarity */
1513 ref_field_pic_polarity = 0;
1516 if (pic_param->b_picture_fraction < 21)
1517 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1519 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1521 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1522 picture_type == GEN7_VC1_I_PICTURE)
1523 picture_type = GEN7_VC1_BI_PICTURE;
1525 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1526 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1528 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1531 * 8.3.6.2.1 Transform Type Selection
1532 * If variable-sized transform coding is not enabled,
1533 * then the 8x8 transform shall be used for all blocks.
1534 * it is also MFX_VC1_PIC_STATE requirement.
1536 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1537 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1538 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1542 if (picture_type == GEN7_VC1_B_PICTURE) {
1543 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1545 obj_surface = decode_state->reference_objects[1];
1548 gen7_vc1_surface = obj_surface->private_data;
1550 if (!gen7_vc1_surface ||
1551 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1552 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1553 dmv_surface_valid = 0;
1555 dmv_surface_valid = 1;
1558 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1560 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1561 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1563 if (pic_param->picture_fields.bits.top_field_first)
1569 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1570 brfd = pic_param->reference_fields.bits.reference_distance;
1571 brfd = (scale_factor * brfd) >> 8;
1572 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1578 overlap = pic_param->sequence_fields.bits.overlap;
1582 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1583 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1584 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1588 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1589 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1592 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1593 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1594 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1596 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1597 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1604 assert(pic_param->conditional_overlap_flag < 3);
1605 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1607 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1608 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1609 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1610 interpolation_mode = 9; /* Half-pel bilinear */
1611 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1612 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1613 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1614 interpolation_mode = 1; /* Half-pel bicubic */
1616 interpolation_mode = 0; /* Quarter-pel bicubic */
1618 BEGIN_BCS_BATCH(batch, 6);
1619 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1620 OUT_BCS_BATCH(batch,
1621 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1622 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1623 OUT_BCS_BATCH(batch,
1624 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1625 dmv_surface_valid << 15 |
1626 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1627 pic_param->rounding_control << 13 |
1628 pic_param->sequence_fields.bits.syncmarker << 12 |
1629 interpolation_mode << 8 |
1630 0 << 7 | /* FIXME: scale up or down ??? */
1631 pic_param->range_reduction_frame << 6 |
1632 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1634 !pic_param->picture_fields.bits.is_first_field << 3 |
1635 (pic_param->sequence_fields.bits.profile == 3) << 0);
1636 OUT_BCS_BATCH(batch,
1637 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1638 picture_type << 26 |
1641 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1643 OUT_BCS_BATCH(batch,
1644 unified_mv_mode << 28 |
1645 pic_param->mv_fields.bits.four_mv_switch << 27 |
1646 pic_param->fast_uvmc_flag << 26 |
1647 ref_field_pic_polarity << 25 |
1648 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1649 pic_param->reference_fields.bits.reference_distance << 20 |
1650 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1651 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1652 pic_param->mv_fields.bits.extended_mv_range << 8 |
1653 alt_pquant_edge_mask << 4 |
1654 alt_pquant_config << 2 |
1655 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1656 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1657 OUT_BCS_BATCH(batch,
1658 !!pic_param->bitplane_present.value << 31 |
1659 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1660 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1661 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1662 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1663 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1664 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1665 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1666 pic_param->mv_fields.bits.mv_table << 20 |
1667 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1668 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1669 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1670 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1671 pic_param->mb_mode_table << 8 |
1673 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1674 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1675 pic_param->cbp_table << 0);
1676 ADVANCE_BCS_BATCH(batch);
1680 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1681 struct decode_state *decode_state,
1682 struct gen7_mfd_context *gen7_mfd_context)
1684 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1685 VAPictureParameterBufferVC1 *pic_param;
1686 int intensitycomp_single;
1688 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1689 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1690 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1692 BEGIN_BCS_BATCH(batch, 6);
1693 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1694 OUT_BCS_BATCH(batch,
1695 0 << 14 | /* FIXME: double ??? */
1697 intensitycomp_single << 10 |
1698 intensitycomp_single << 8 |
1699 0 << 4 | /* FIXME: interlace mode */
1701 OUT_BCS_BATCH(batch,
1702 pic_param->luma_shift << 16 |
1703 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1704 OUT_BCS_BATCH(batch, 0);
1705 OUT_BCS_BATCH(batch, 0);
1706 OUT_BCS_BATCH(batch, 0);
1707 ADVANCE_BCS_BATCH(batch);
1711 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1712 struct decode_state *decode_state,
1713 struct gen7_mfd_context *gen7_mfd_context)
1715 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1716 struct object_surface *obj_surface;
1717 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1719 obj_surface = decode_state->render_object;
1721 if (obj_surface && obj_surface->private_data) {
1722 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1725 obj_surface = decode_state->reference_objects[1];
1727 if (obj_surface && obj_surface->private_data) {
1728 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1731 BEGIN_BCS_BATCH(batch, 7);
1732 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1734 if (dmv_write_buffer)
1735 OUT_BCS_RELOC(batch, dmv_write_buffer,
1736 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1739 OUT_BCS_BATCH(batch, 0);
1741 OUT_BCS_BATCH(batch, 0);
1742 OUT_BCS_BATCH(batch, 0);
1744 if (dmv_read_buffer)
1745 OUT_BCS_RELOC(batch, dmv_read_buffer,
1746 I915_GEM_DOMAIN_INSTRUCTION, 0,
1749 OUT_BCS_BATCH(batch, 0);
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1754 ADVANCE_BCS_BATCH(batch);
1758 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1760 int out_slice_data_bit_offset;
1761 int slice_header_size = in_slice_data_bit_offset / 8;
1765 out_slice_data_bit_offset = in_slice_data_bit_offset;
1767 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1768 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1773 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1776 return out_slice_data_bit_offset;
1780 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1781 VAPictureParameterBufferVC1 *pic_param,
1782 VASliceParameterBufferVC1 *slice_param,
1783 VASliceParameterBufferVC1 *next_slice_param,
1784 dri_bo *slice_data_bo,
1785 struct gen7_mfd_context *gen7_mfd_context)
1787 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1788 int next_slice_start_vert_pos;
1789 int macroblock_offset;
1790 uint8_t *slice_data = NULL;
1792 dri_bo_map(slice_data_bo, 0);
1793 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1794 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1795 slice_param->macroblock_offset,
1796 pic_param->sequence_fields.bits.profile);
1797 dri_bo_unmap(slice_data_bo);
1799 if (next_slice_param)
1800 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1802 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1804 BEGIN_BCS_BATCH(batch, 5);
1805 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1806 OUT_BCS_BATCH(batch,
1807 slice_param->slice_data_size - (macroblock_offset >> 3));
1808 OUT_BCS_BATCH(batch,
1809 slice_param->slice_data_offset + (macroblock_offset >> 3));
1810 OUT_BCS_BATCH(batch,
1811 slice_param->slice_vertical_position << 16 |
1812 next_slice_start_vert_pos << 0);
1813 OUT_BCS_BATCH(batch,
1814 (macroblock_offset & 0x7));
1815 ADVANCE_BCS_BATCH(batch);
1819 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1820 struct decode_state *decode_state,
1821 struct gen7_mfd_context *gen7_mfd_context)
1823 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1824 VAPictureParameterBufferVC1 *pic_param;
1825 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1826 dri_bo *slice_data_bo;
1829 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1830 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1832 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1833 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1834 intel_batchbuffer_emit_mi_flush(batch);
1835 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1836 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1837 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1838 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1839 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1840 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1841 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1843 for (j = 0; j < decode_state->num_slice_params; j++) {
1844 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1845 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1846 slice_data_bo = decode_state->slice_datas[j]->bo;
1847 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1849 if (j == decode_state->num_slice_params - 1)
1850 next_slice_group_param = NULL;
1852 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1854 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1855 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1857 if (i < decode_state->slice_params[j]->num_elements - 1)
1858 next_slice_param = slice_param + 1;
1860 next_slice_param = next_slice_group_param;
1862 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1867 intel_batchbuffer_end_atomic(batch);
1868 intel_batchbuffer_flush(batch);
1872 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1873 struct decode_state *decode_state,
1874 struct gen7_mfd_context *gen7_mfd_context)
1876 struct object_surface *obj_surface;
1877 VAPictureParameterBufferJPEGBaseline *pic_param;
1878 int subsampling = SUBSAMPLE_YUV420;
1879 int fourcc = VA_FOURCC_IMC3;
1881 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1883 if (pic_param->num_components == 1)
1884 subsampling = SUBSAMPLE_YUV400;
1885 else if (pic_param->num_components == 3) {
1886 int h1 = pic_param->components[0].h_sampling_factor;
1887 int h2 = pic_param->components[1].h_sampling_factor;
1888 int h3 = pic_param->components[2].h_sampling_factor;
1889 int v1 = pic_param->components[0].v_sampling_factor;
1890 int v2 = pic_param->components[1].v_sampling_factor;
1891 int v3 = pic_param->components[2].v_sampling_factor;
1893 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1894 v1 == 2 && v2 == 1 && v3 == 1) {
1895 subsampling = SUBSAMPLE_YUV420;
1896 fourcc = VA_FOURCC_IMC3;
1897 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1898 v1 == 1 && v2 == 1 && v3 == 1) {
1899 subsampling = SUBSAMPLE_YUV422H;
1900 fourcc = VA_FOURCC_422H;
1901 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1902 v1 == 1 && v2 == 1 && v3 == 1) {
1903 subsampling = SUBSAMPLE_YUV444;
1904 fourcc = VA_FOURCC_444P;
1905 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1906 v1 == 1 && v2 == 1 && v3 == 1) {
1907 subsampling = SUBSAMPLE_YUV411;
1908 fourcc = VA_FOURCC_411P;
1909 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1910 v1 == 2 && v2 == 1 && v3 == 1) {
1911 subsampling = SUBSAMPLE_YUV422V;
1912 fourcc = VA_FOURCC_422V;
1913 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1914 v1 == 2 && v2 == 2 && v3 == 2) {
1915 subsampling = SUBSAMPLE_YUV422H;
1916 fourcc = VA_FOURCC_422H;
1917 } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1918 v1 == 2 && v2 == 1 && v3 == 1) {
1919 subsampling = SUBSAMPLE_YUV422V;
1920 fourcc = VA_FOURCC_422V;
1928 /* Current decoded picture */
1929 obj_surface = decode_state->render_object;
1930 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1932 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1933 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1934 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1935 gen7_mfd_context->pre_deblocking_output.valid = 1;
1937 gen7_mfd_context->post_deblocking_output.bo = NULL;
1938 gen7_mfd_context->post_deblocking_output.valid = 0;
1940 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1941 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1943 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1944 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1946 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1947 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1949 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1950 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1952 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1953 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1956 static const int va_to_gen7_jpeg_rotation[4] = {
1957 GEN7_JPEG_ROTATION_0,
1958 GEN7_JPEG_ROTATION_90,
1959 GEN7_JPEG_ROTATION_180,
1960 GEN7_JPEG_ROTATION_270
1964 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1965 struct decode_state *decode_state,
1966 struct gen7_mfd_context *gen7_mfd_context)
1968 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1969 VAPictureParameterBufferJPEGBaseline *pic_param;
1970 int chroma_type = GEN7_YUV420;
1971 int frame_width_in_blks;
1972 int frame_height_in_blks;
1974 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1975 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1977 if (pic_param->num_components == 1)
1978 chroma_type = GEN7_YUV400;
1979 else if (pic_param->num_components == 3) {
1980 int h1 = pic_param->components[0].h_sampling_factor;
1981 int h2 = pic_param->components[1].h_sampling_factor;
1982 int h3 = pic_param->components[2].h_sampling_factor;
1983 int v1 = pic_param->components[0].v_sampling_factor;
1984 int v2 = pic_param->components[1].v_sampling_factor;
1985 int v3 = pic_param->components[2].v_sampling_factor;
1987 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1988 v1 == 2 && v2 == 1 && v3 == 1)
1989 chroma_type = GEN7_YUV420;
1990 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1991 v1 == 1 && v2 == 1 && v3 == 1)
1992 chroma_type = GEN7_YUV422H_2Y;
1993 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994 v1 == 1 && v2 == 1 && v3 == 1)
1995 chroma_type = GEN7_YUV444;
1996 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1997 v1 == 1 && v2 == 1 && v3 == 1)
1998 chroma_type = GEN7_YUV411;
1999 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2000 v1 == 2 && v2 == 1 && v3 == 1)
2001 chroma_type = GEN7_YUV422V_2Y;
2002 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003 v1 == 2 && v2 == 2 && v3 == 2)
2004 chroma_type = GEN7_YUV422H_4Y;
2005 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2006 v1 == 2 && v2 == 1 && v3 == 1)
2007 chroma_type = GEN7_YUV422V_4Y;
2012 if (chroma_type == GEN7_YUV400 ||
2013 chroma_type == GEN7_YUV444 ||
2014 chroma_type == GEN7_YUV422V_2Y) {
2015 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2016 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2017 } else if (chroma_type == GEN7_YUV411) {
2018 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2019 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2021 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2022 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2025 BEGIN_BCS_BATCH(batch, 3);
2026 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2027 OUT_BCS_BATCH(batch,
2028 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2029 (chroma_type << 0));
2030 OUT_BCS_BATCH(batch,
2031 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2032 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2033 ADVANCE_BCS_BATCH(batch);
2036 static const int va_to_gen7_jpeg_hufftable[2] = {
2042 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2043 struct decode_state *decode_state,
2044 struct gen7_mfd_context *gen7_mfd_context,
2047 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2048 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2051 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2054 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2056 for (index = 0; index < num_tables; index++) {
2057 int id = va_to_gen7_jpeg_hufftable[index];
2058 if (!huffman_table->load_huffman_table[index])
2060 BEGIN_BCS_BATCH(batch, 53);
2061 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2062 OUT_BCS_BATCH(batch, id);
2063 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2064 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2065 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2066 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2067 ADVANCE_BCS_BATCH(batch);
2071 static const int va_to_gen7_jpeg_qm[5] = {
2073 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2074 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2075 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2076 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2080 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2081 struct decode_state *decode_state,
2082 struct gen7_mfd_context *gen7_mfd_context)
2084 VAPictureParameterBufferJPEGBaseline *pic_param;
2085 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2088 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2091 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2092 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2094 assert(pic_param->num_components <= 3);
2096 for (index = 0; index < pic_param->num_components; index++) {
2097 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2099 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2100 unsigned char raster_qm[64];
2103 if (id > 4 || id < 1)
2106 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2109 qm_type = va_to_gen7_jpeg_qm[id];
2111 for (j = 0; j < 64; j++)
2112 raster_qm[zigzag_direct[j]] = qm[j];
2114 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2119 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2120 VAPictureParameterBufferJPEGBaseline *pic_param,
2121 VASliceParameterBufferJPEGBaseline *slice_param,
2122 VASliceParameterBufferJPEGBaseline *next_slice_param,
2123 dri_bo *slice_data_bo,
2124 struct gen7_mfd_context *gen7_mfd_context)
2126 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2127 int scan_component_mask = 0;
2130 assert(slice_param->num_components > 0);
2131 assert(slice_param->num_components < 4);
2132 assert(slice_param->num_components <= pic_param->num_components);
2134 for (i = 0; i < slice_param->num_components; i++) {
2135 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2137 scan_component_mask |= (1 << 0);
2140 scan_component_mask |= (1 << 1);
2143 scan_component_mask |= (1 << 2);
2151 BEGIN_BCS_BATCH(batch, 6);
2152 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2153 OUT_BCS_BATCH(batch,
2154 slice_param->slice_data_size);
2155 OUT_BCS_BATCH(batch,
2156 slice_param->slice_data_offset);
2157 OUT_BCS_BATCH(batch,
2158 slice_param->slice_horizontal_position << 16 |
2159 slice_param->slice_vertical_position << 0);
2160 OUT_BCS_BATCH(batch,
2161 ((slice_param->num_components != 1) << 30) | /* interleaved */
2162 (scan_component_mask << 27) | /* scan components */
2163 (0 << 26) | /* disable interrupt allowed */
2164 (slice_param->num_mcus << 0)); /* MCU count */
2165 OUT_BCS_BATCH(batch,
2166 (slice_param->restart_interval << 0)); /* RestartInterval */
2167 ADVANCE_BCS_BATCH(batch);
2170 /* Workaround for JPEG decoding on Ivybridge */
2176 unsigned char data[32];
2178 int data_bit_offset;
2180 } gen7_jpeg_wa_clip = {
2184 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2185 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2193 gen8_jpeg_wa_init(VADriverContextP ctx,
2194 struct gen7_mfd_context *gen7_mfd_context)
2196 struct i965_driver_data *i965 = i965_driver_data(ctx);
2198 struct object_surface *obj_surface;
2200 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2201 i965_DestroySurfaces(ctx,
2202 &gen7_mfd_context->jpeg_wa_surface_id,
2205 status = i965_CreateSurfaces(ctx,
2206 gen7_jpeg_wa_clip.width,
2207 gen7_jpeg_wa_clip.height,
2208 VA_RT_FORMAT_YUV420,
2210 &gen7_mfd_context->jpeg_wa_surface_id);
2211 assert(status == VA_STATUS_SUCCESS);
2213 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2214 assert(obj_surface);
2215 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2216 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2218 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2219 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2223 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2225 gen7_jpeg_wa_clip.data_size,
2226 gen7_jpeg_wa_clip.data);
2231 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2232 struct gen7_mfd_context *gen7_mfd_context)
2234 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2236 BEGIN_BCS_BATCH(batch, 5);
2237 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2238 OUT_BCS_BATCH(batch,
2239 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2240 (MFD_MODE_VLD << 15) | /* VLD mode */
2241 (0 << 10) | /* disable Stream-Out */
2242 (0 << 9) | /* Post Deblocking Output */
2243 (1 << 8) | /* Pre Deblocking Output */
2244 (0 << 5) | /* not in stitch mode */
2245 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2246 (MFX_FORMAT_AVC << 0));
2247 OUT_BCS_BATCH(batch,
2248 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2249 (0 << 3) | /* terminate if AVC mbdata error occurs */
2250 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2253 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2254 OUT_BCS_BATCH(batch, 0); /* reserved */
2255 ADVANCE_BCS_BATCH(batch);
2259 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2260 struct gen7_mfd_context *gen7_mfd_context)
2262 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2263 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2265 BEGIN_BCS_BATCH(batch, 6);
2266 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2267 OUT_BCS_BATCH(batch, 0);
2268 OUT_BCS_BATCH(batch,
2269 ((obj_surface->orig_width - 1) << 18) |
2270 ((obj_surface->orig_height - 1) << 4));
2271 OUT_BCS_BATCH(batch,
2272 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2273 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2274 (0 << 22) | /* surface object control state, ignored */
2275 ((obj_surface->width - 1) << 3) | /* pitch */
2276 (0 << 2) | /* must be 0 */
2277 (1 << 1) | /* must be tiled */
2278 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2279 OUT_BCS_BATCH(batch,
2280 (0 << 16) | /* X offset for U(Cb), must be 0 */
2281 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2282 OUT_BCS_BATCH(batch,
2283 (0 << 16) | /* X offset for V(Cr), must be 0 */
2284 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2285 ADVANCE_BCS_BATCH(batch);
2289 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2290 struct gen7_mfd_context *gen7_mfd_context)
2292 struct i965_driver_data *i965 = i965_driver_data(ctx);
2293 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2294 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2298 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2303 BEGIN_BCS_BATCH(batch, 61);
2304 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2305 OUT_BCS_RELOC(batch,
2307 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2309 OUT_BCS_BATCH(batch, 0);
2310 OUT_BCS_BATCH(batch, 0);
2313 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2314 OUT_BCS_BATCH(batch, 0);
2315 OUT_BCS_BATCH(batch, 0);
2317 /* uncompressed-video & stream out 7-12 */
2318 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2319 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2320 OUT_BCS_BATCH(batch, 0);
2321 OUT_BCS_BATCH(batch, 0);
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2325 /* the DW 13-15 is for intra row store scratch */
2326 OUT_BCS_RELOC(batch,
2328 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2330 OUT_BCS_BATCH(batch, 0);
2331 OUT_BCS_BATCH(batch, 0);
2333 /* the DW 16-18 is for deblocking filter */
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2339 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2340 OUT_BCS_BATCH(batch, 0);
2341 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2345 /* the DW52-54 is for mb status address */
2346 OUT_BCS_BATCH(batch, 0);
2347 OUT_BCS_BATCH(batch, 0);
2348 OUT_BCS_BATCH(batch, 0);
2349 /* the DW56-60 is for ILDB & second ILDB address */
2350 OUT_BCS_BATCH(batch, 0);
2351 OUT_BCS_BATCH(batch, 0);
2352 OUT_BCS_BATCH(batch, 0);
2353 OUT_BCS_BATCH(batch, 0);
2354 OUT_BCS_BATCH(batch, 0);
2355 OUT_BCS_BATCH(batch, 0);
2357 ADVANCE_BCS_BATCH(batch);
2359 dri_bo_unreference(intra_bo);
2363 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2364 struct gen7_mfd_context *gen7_mfd_context)
2366 struct i965_driver_data *i965 = i965_driver_data(ctx);
2367 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2368 dri_bo *bsd_mpc_bo, *mpr_bo;
2370 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2371 "bsd mpc row store",
2372 11520, /* 1.5 * 120 * 64 */
2375 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2377 7680, /* 1. 0 * 120 * 64 */
2380 BEGIN_BCS_BATCH(batch, 10);
2381 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2383 OUT_BCS_RELOC(batch,
2385 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2388 OUT_BCS_BATCH(batch, 0);
2389 OUT_BCS_BATCH(batch, 0);
2391 OUT_BCS_RELOC(batch,
2393 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2395 OUT_BCS_BATCH(batch, 0);
2396 OUT_BCS_BATCH(batch, 0);
2398 OUT_BCS_BATCH(batch, 0);
2399 OUT_BCS_BATCH(batch, 0);
2400 OUT_BCS_BATCH(batch, 0);
2402 ADVANCE_BCS_BATCH(batch);
2404 dri_bo_unreference(bsd_mpc_bo);
2405 dri_bo_unreference(mpr_bo);
2409 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2410 struct gen7_mfd_context *gen7_mfd_context)
2416 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2417 struct gen7_mfd_context *gen7_mfd_context)
2419 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2421 int mbaff_frame_flag = 0;
2422 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2424 BEGIN_BCS_BATCH(batch, 16);
2425 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2426 OUT_BCS_BATCH(batch,
2427 width_in_mbs * height_in_mbs);
2428 OUT_BCS_BATCH(batch,
2429 ((height_in_mbs - 1) << 16) |
2430 ((width_in_mbs - 1) << 0));
2431 OUT_BCS_BATCH(batch,
2436 (0 << 12) | /* differ from GEN6 */
2439 OUT_BCS_BATCH(batch,
2440 (1 << 10) | /* 4:2:0 */
2441 (1 << 7) | /* CABAC */
2447 (mbaff_frame_flag << 1) |
2449 OUT_BCS_BATCH(batch, 0);
2450 OUT_BCS_BATCH(batch, 0);
2451 OUT_BCS_BATCH(batch, 0);
2452 OUT_BCS_BATCH(batch, 0);
2453 OUT_BCS_BATCH(batch, 0);
2454 OUT_BCS_BATCH(batch, 0);
2455 OUT_BCS_BATCH(batch, 0);
2456 OUT_BCS_BATCH(batch, 0);
2457 OUT_BCS_BATCH(batch, 0);
2458 OUT_BCS_BATCH(batch, 0);
2459 OUT_BCS_BATCH(batch, 0);
2460 ADVANCE_BCS_BATCH(batch);
2464 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2465 struct gen7_mfd_context *gen7_mfd_context)
2467 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2470 BEGIN_BCS_BATCH(batch, 71);
2471 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2473 /* reference surfaces 0..15 */
2474 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2475 OUT_BCS_BATCH(batch, 0); /* top */
2476 OUT_BCS_BATCH(batch, 0); /* bottom */
2479 OUT_BCS_BATCH(batch, 0);
2481 /* the current decoding frame/field */
2482 OUT_BCS_BATCH(batch, 0); /* top */
2483 OUT_BCS_BATCH(batch, 0);
2484 OUT_BCS_BATCH(batch, 0);
2487 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2488 OUT_BCS_BATCH(batch, 0);
2489 OUT_BCS_BATCH(batch, 0);
2492 OUT_BCS_BATCH(batch, 0);
2493 OUT_BCS_BATCH(batch, 0);
2495 ADVANCE_BCS_BATCH(batch);
2499 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2500 struct gen7_mfd_context *gen7_mfd_context)
2502 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2504 BEGIN_BCS_BATCH(batch, 11);
2505 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2506 OUT_BCS_RELOC(batch,
2507 gen7_mfd_context->jpeg_wa_slice_data_bo,
2508 I915_GEM_DOMAIN_INSTRUCTION, 0,
2510 OUT_BCS_BATCH(batch, 0);
2511 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2512 OUT_BCS_BATCH(batch, 0);
2513 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2514 OUT_BCS_BATCH(batch, 0);
2515 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2516 OUT_BCS_BATCH(batch, 0);
2517 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2518 OUT_BCS_BATCH(batch, 0);
2519 ADVANCE_BCS_BATCH(batch);
2523 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2524 struct gen7_mfd_context *gen7_mfd_context)
2526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2528 /* the input bitsteam format on GEN7 differs from GEN6 */
2529 BEGIN_BCS_BATCH(batch, 6);
2530 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2531 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2532 OUT_BCS_BATCH(batch, 0);
2533 OUT_BCS_BATCH(batch,
2539 OUT_BCS_BATCH(batch,
2540 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2543 (1 << 3) | /* LastSlice Flag */
2544 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2545 OUT_BCS_BATCH(batch, 0);
2546 ADVANCE_BCS_BATCH(batch);
2550 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2551 struct gen7_mfd_context *gen7_mfd_context)
2553 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2554 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2555 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2556 int first_mb_in_slice = 0;
2557 int slice_type = SLICE_TYPE_I;
2559 BEGIN_BCS_BATCH(batch, 11);
2560 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2561 OUT_BCS_BATCH(batch, slice_type);
2562 OUT_BCS_BATCH(batch,
2563 (num_ref_idx_l1 << 24) |
2564 (num_ref_idx_l0 << 16) |
2567 OUT_BCS_BATCH(batch,
2569 (1 << 27) | /* disable Deblocking */
2571 (gen7_jpeg_wa_clip.qp << 16) |
2574 OUT_BCS_BATCH(batch,
2575 (slice_ver_pos << 24) |
2576 (slice_hor_pos << 16) |
2577 (first_mb_in_slice << 0));
2578 OUT_BCS_BATCH(batch,
2579 (next_slice_ver_pos << 16) |
2580 (next_slice_hor_pos << 0));
2581 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2582 OUT_BCS_BATCH(batch, 0);
2583 OUT_BCS_BATCH(batch, 0);
2584 OUT_BCS_BATCH(batch, 0);
2585 OUT_BCS_BATCH(batch, 0);
2586 ADVANCE_BCS_BATCH(batch);
2590 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2591 struct gen7_mfd_context *gen7_mfd_context)
2593 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2594 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2595 intel_batchbuffer_emit_mi_flush(batch);
2596 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2597 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2598 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2599 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2600 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2601 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2602 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2604 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2605 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2606 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2612 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2613 struct decode_state *decode_state,
2614 struct gen7_mfd_context *gen7_mfd_context)
2616 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2617 VAPictureParameterBufferJPEGBaseline *pic_param;
2618 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2619 dri_bo *slice_data_bo;
2620 int i, j, max_selector = 0;
2622 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2623 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2625 /* Currently only support Baseline DCT */
2626 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2627 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2629 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2631 intel_batchbuffer_emit_mi_flush(batch);
2632 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2633 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2634 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2635 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2636 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2638 for (j = 0; j < decode_state->num_slice_params; j++) {
2639 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2640 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2641 slice_data_bo = decode_state->slice_datas[j]->bo;
2642 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2644 if (j == decode_state->num_slice_params - 1)
2645 next_slice_group_param = NULL;
2647 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2649 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2652 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2654 if (i < decode_state->slice_params[j]->num_elements - 1)
2655 next_slice_param = slice_param + 1;
2657 next_slice_param = next_slice_group_param;
2659 for (component = 0; component < slice_param->num_components; component++) {
2660 if (max_selector < slice_param->components[component].dc_table_selector)
2661 max_selector = slice_param->components[component].dc_table_selector;
2663 if (max_selector < slice_param->components[component].ac_table_selector)
2664 max_selector = slice_param->components[component].ac_table_selector;
2671 assert(max_selector < 2);
2672 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2674 for (j = 0; j < decode_state->num_slice_params; j++) {
2675 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2676 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2677 slice_data_bo = decode_state->slice_datas[j]->bo;
2678 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2680 if (j == decode_state->num_slice_params - 1)
2681 next_slice_group_param = NULL;
2683 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2685 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2686 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2688 if (i < decode_state->slice_params[j]->num_elements - 1)
2689 next_slice_param = slice_param + 1;
2691 next_slice_param = next_slice_group_param;
2693 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2698 intel_batchbuffer_end_atomic(batch);
2699 intel_batchbuffer_flush(batch);
2702 static const int vp8_dc_qlookup[128] =
2704 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2705 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2706 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2707 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2708 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2709 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2710 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2711 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2714 static const int vp8_ac_qlookup[128] =
2716 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2717 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2718 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2719 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2720 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2721 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2722 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2723 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2726 static inline unsigned int vp8_clip_quantization_index(int index)
2737 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2738 struct decode_state *decode_state,
2739 struct gen7_mfd_context *gen7_mfd_context)
2741 struct object_surface *obj_surface;
2742 struct i965_driver_data *i965 = i965_driver_data(ctx);
2744 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2745 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2746 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2748 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2749 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2751 intel_update_vp8_frame_store_index(ctx,
2754 gen7_mfd_context->reference_surface);
2756 /* Current decoded picture */
2757 obj_surface = decode_state->render_object;
2758 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2760 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2761 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2762 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2763 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2765 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2766 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2767 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2768 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2770 intel_ensure_vp8_segmentation_buffer(ctx,
2771 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2773 /* The same as AVC */
2774 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2775 bo = dri_bo_alloc(i965->intel.bufmgr,
2780 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2781 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2783 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2784 bo = dri_bo_alloc(i965->intel.bufmgr,
2785 "deblocking filter row store",
2786 width_in_mbs * 64 * 4,
2789 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2790 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2792 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2793 bo = dri_bo_alloc(i965->intel.bufmgr,
2794 "bsd mpc row store",
2795 width_in_mbs * 64 * 2,
2798 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2799 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2801 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2802 bo = dri_bo_alloc(i965->intel.bufmgr,
2804 width_in_mbs * 64 * 2,
2807 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2808 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2810 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2814 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2815 struct decode_state *decode_state,
2816 struct gen7_mfd_context *gen7_mfd_context)
2818 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2819 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2820 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2821 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2822 dri_bo *probs_bo = decode_state->probability_data->bo;
2824 unsigned int quantization_value[4][6];
2826 /* There is no safe way to error out if the segmentation buffer
2827 could not be allocated. So, instead of aborting, simply decode
2828 something even if the result may look totally inacurate */
2829 const unsigned int enable_segmentation =
2830 pic_param->pic_fields.bits.segmentation_enabled &&
2831 gen7_mfd_context->segmentation_buffer.valid;
2833 log2num = (int)log2(slice_param->num_of_partitions - 1);
2835 BEGIN_BCS_BATCH(batch, 38);
2836 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2837 OUT_BCS_BATCH(batch,
2838 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2839 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2840 OUT_BCS_BATCH(batch,
2842 pic_param->pic_fields.bits.sharpness_level << 16 |
2843 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2844 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2845 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2846 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2847 (enable_segmentation &&
2848 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2849 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2850 (enable_segmentation &&
2851 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2852 (enable_segmentation &&
2853 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2854 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2855 pic_param->pic_fields.bits.filter_type << 4 |
2856 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2857 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2859 OUT_BCS_BATCH(batch,
2860 pic_param->loop_filter_level[3] << 24 |
2861 pic_param->loop_filter_level[2] << 16 |
2862 pic_param->loop_filter_level[1] << 8 |
2863 pic_param->loop_filter_level[0] << 0);
2865 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2866 for (i = 0; i < 4; i++) {
2867 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2868 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2869 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2870 /* 101581>>16 is equivalent to 155/100 */
2871 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2872 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2873 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2875 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2876 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2878 OUT_BCS_BATCH(batch,
2879 quantization_value[i][0] << 16 | /* Y1AC */
2880 quantization_value[i][1] << 0); /* Y1DC */
2881 OUT_BCS_BATCH(batch,
2882 quantization_value[i][5] << 16 | /* UVAC */
2883 quantization_value[i][4] << 0); /* UVDC */
2884 OUT_BCS_BATCH(batch,
2885 quantization_value[i][3] << 16 | /* Y2AC */
2886 quantization_value[i][2] << 0); /* Y2DC */
2889 /* CoeffProbability table for non-key frame, DW16-DW18 */
2891 OUT_BCS_RELOC(batch, probs_bo,
2892 0, I915_GEM_DOMAIN_INSTRUCTION,
2894 OUT_BCS_BATCH(batch, 0);
2895 OUT_BCS_BATCH(batch, 0);
2897 OUT_BCS_BATCH(batch, 0);
2898 OUT_BCS_BATCH(batch, 0);
2899 OUT_BCS_BATCH(batch, 0);
2902 OUT_BCS_BATCH(batch,
2903 pic_param->mb_segment_tree_probs[2] << 16 |
2904 pic_param->mb_segment_tree_probs[1] << 8 |
2905 pic_param->mb_segment_tree_probs[0] << 0);
2907 OUT_BCS_BATCH(batch,
2908 pic_param->prob_skip_false << 24 |
2909 pic_param->prob_intra << 16 |
2910 pic_param->prob_last << 8 |
2911 pic_param->prob_gf << 0);
2913 OUT_BCS_BATCH(batch,
2914 pic_param->y_mode_probs[3] << 24 |
2915 pic_param->y_mode_probs[2] << 16 |
2916 pic_param->y_mode_probs[1] << 8 |
2917 pic_param->y_mode_probs[0] << 0);
2919 OUT_BCS_BATCH(batch,
2920 pic_param->uv_mode_probs[2] << 16 |
2921 pic_param->uv_mode_probs[1] << 8 |
2922 pic_param->uv_mode_probs[0] << 0);
2924 /* MV update value, DW23-DW32 */
2925 for (i = 0; i < 2; i++) {
2926 for (j = 0; j < 20; j += 4) {
2927 OUT_BCS_BATCH(batch,
2928 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2929 pic_param->mv_probs[i][j + 2] << 16 |
2930 pic_param->mv_probs[i][j + 1] << 8 |
2931 pic_param->mv_probs[i][j + 0] << 0);
2935 OUT_BCS_BATCH(batch,
2936 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2937 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2938 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2939 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2941 OUT_BCS_BATCH(batch,
2942 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2943 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2944 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2945 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2947 /* segmentation id stream base address, DW35-DW37 */
2948 if (enable_segmentation) {
2949 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2950 0, I915_GEM_DOMAIN_INSTRUCTION,
2952 OUT_BCS_BATCH(batch, 0);
2953 OUT_BCS_BATCH(batch, 0);
2956 OUT_BCS_BATCH(batch, 0);
2957 OUT_BCS_BATCH(batch, 0);
2958 OUT_BCS_BATCH(batch, 0);
2960 ADVANCE_BCS_BATCH(batch);
2964 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2965 VAPictureParameterBufferVP8 *pic_param,
2966 VASliceParameterBufferVP8 *slice_param,
2967 dri_bo *slice_data_bo,
2968 struct gen7_mfd_context *gen7_mfd_context)
2970 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2972 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2973 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2974 unsigned int partition_size_0 = slice_param->partition_size[0];
2976 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2977 if (used_bits == 8) {
2980 partition_size_0 -= 1;
2983 assert(slice_param->num_of_partitions >= 2);
2984 assert(slice_param->num_of_partitions <= 9);
2986 log2num = (int)log2(slice_param->num_of_partitions - 1);
2988 BEGIN_BCS_BATCH(batch, 22);
2989 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2990 OUT_BCS_BATCH(batch,
2991 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2992 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2994 (slice_param->macroblock_offset & 0x7));
2995 OUT_BCS_BATCH(batch,
2996 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2999 OUT_BCS_BATCH(batch, partition_size_0 + 1);
3000 OUT_BCS_BATCH(batch, offset);
3001 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3002 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3003 for (i = 1; i < 9; i++) {
3004 if (i < slice_param->num_of_partitions) {
3005 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3006 OUT_BCS_BATCH(batch, offset);
3008 OUT_BCS_BATCH(batch, 0);
3009 OUT_BCS_BATCH(batch, 0);
3012 offset += slice_param->partition_size[i];
3015 OUT_BCS_BATCH(batch, 0); /* concealment method */
3017 ADVANCE_BCS_BATCH(batch);
3021 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3022 struct decode_state *decode_state,
3023 struct gen7_mfd_context *gen7_mfd_context)
3025 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3026 VAPictureParameterBufferVP8 *pic_param;
3027 VASliceParameterBufferVP8 *slice_param;
3028 dri_bo *slice_data_bo;
3030 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3031 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3033 /* one slice per frame */
3034 if (decode_state->num_slice_params != 1 ||
3035 (!decode_state->slice_params ||
3036 !decode_state->slice_params[0] ||
3037 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3038 (!decode_state->slice_datas ||
3039 !decode_state->slice_datas[0] ||
3040 !decode_state->slice_datas[0]->bo) ||
3041 !decode_state->probability_data) {
3042 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3047 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3048 slice_data_bo = decode_state->slice_datas[0]->bo;
3050 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3051 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3052 intel_batchbuffer_emit_mi_flush(batch);
3053 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3054 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3055 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3056 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3057 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3058 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3059 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3060 intel_batchbuffer_end_atomic(batch);
3061 intel_batchbuffer_flush(batch);
3065 gen8_mfd_decode_picture(VADriverContextP ctx,
3067 union codec_state *codec_state,
3068 struct hw_context *hw_context)
3071 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3072 struct decode_state *decode_state = &codec_state->decode;
3075 assert(gen7_mfd_context);
3077 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3079 if (vaStatus != VA_STATUS_SUCCESS)
3082 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3085 case VAProfileMPEG2Simple:
3086 case VAProfileMPEG2Main:
3087 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3090 case VAProfileH264ConstrainedBaseline:
3091 case VAProfileH264Main:
3092 case VAProfileH264High:
3093 case VAProfileH264StereoHigh:
3094 case VAProfileH264MultiviewHigh:
3095 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3098 case VAProfileVC1Simple:
3099 case VAProfileVC1Main:
3100 case VAProfileVC1Advanced:
3101 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3104 case VAProfileJPEGBaseline:
3105 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3108 case VAProfileVP8Version0_3:
3109 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3117 vaStatus = VA_STATUS_SUCCESS;
3124 gen8_mfd_context_destroy(void *hw_context)
3126 VADriverContextP ctx;
3127 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3129 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3131 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3132 gen7_mfd_context->post_deblocking_output.bo = NULL;
3134 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3135 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3137 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3138 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3140 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3141 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3143 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3144 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3146 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3147 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3149 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3150 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3152 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3153 gen7_mfd_context->segmentation_buffer.bo = NULL;
3155 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3157 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3158 i965_DestroySurfaces(ctx,
3159 &gen7_mfd_context->jpeg_wa_surface_id,
3161 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3164 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3165 free(gen7_mfd_context);
3168 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3169 struct gen7_mfd_context *gen7_mfd_context)
3171 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3172 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3173 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3174 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3178 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3180 struct intel_driver_data *intel = intel_driver_data(ctx);
3181 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3184 if (!gen7_mfd_context)
3187 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3188 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3189 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3191 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3192 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3193 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3196 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3197 gen7_mfd_context->segmentation_buffer.valid = 0;
3199 switch (obj_config->profile) {
3200 case VAProfileMPEG2Simple:
3201 case VAProfileMPEG2Main:
3202 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3205 case VAProfileH264ConstrainedBaseline:
3206 case VAProfileH264Main:
3207 case VAProfileH264High:
3208 case VAProfileH264StereoHigh:
3209 case VAProfileH264MultiviewHigh:
3210 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3216 gen7_mfd_context->driver_context = ctx;
3217 return (struct hw_context *)gen7_mfd_context;