2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
78 if (!gen7_avc_surface)
81 gen7_avc_surface->base.frame_store_id = -1;
82 assert((obj_surface->size & 0x3f) == 0);
83 obj_surface->private_data = gen7_avc_surface;
86 /* DMV buffers now relate to the whole frame, irrespective of
88 if (gen7_avc_surface->dmv_top == NULL) {
89 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90 "direct mv w/r buffer",
91 width_in_mbs * height_in_mbs * 128,
93 assert(gen7_avc_surface->dmv_top);
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99 struct decode_state *decode_state,
101 struct gen7_mfd_context *gen7_mfd_context)
103 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
105 assert(standard_select == MFX_FORMAT_MPEG2 ||
106 standard_select == MFX_FORMAT_AVC ||
107 standard_select == MFX_FORMAT_VC1 ||
108 standard_select == MFX_FORMAT_JPEG ||
109 standard_select == MFX_FORMAT_VP8);
111 BEGIN_BCS_BATCH(batch, 5);
112 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
114 (MFX_LONG_MODE << 17) | /* Currently only support long format */
115 (MFD_MODE_VLD << 15) | /* VLD mode */
116 (0 << 10) | /* disable Stream-Out */
117 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
118 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
119 (0 << 5) | /* not in stitch mode */
120 (MFX_CODEC_DECODE << 4) | /* decoding mode */
121 (standard_select << 0));
123 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
124 (0 << 3) | /* terminate if AVC mbdata error occurs */
125 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
128 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129 OUT_BCS_BATCH(batch, 0); /* reserved */
130 ADVANCE_BCS_BATCH(batch);
134 gen8_mfd_surface_state(VADriverContextP ctx,
135 struct decode_state *decode_state,
137 struct gen7_mfd_context *gen7_mfd_context)
139 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140 struct object_surface *obj_surface = decode_state->render_object;
141 unsigned int y_cb_offset;
142 unsigned int y_cr_offset;
143 unsigned int surface_format;
147 y_cb_offset = obj_surface->y_cb_offset;
148 y_cr_offset = obj_surface->y_cr_offset;
150 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
153 BEGIN_BCS_BATCH(batch, 6);
154 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155 OUT_BCS_BATCH(batch, 0);
157 ((obj_surface->orig_height - 1) << 18) |
158 ((obj_surface->orig_width - 1) << 4));
160 (surface_format << 28) | /* 420 planar YUV surface */
161 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162 (0 << 22) | /* surface object control state, ignored */
163 ((obj_surface->width - 1) << 3) | /* pitch */
164 (0 << 2) | /* must be 0 */
165 (1 << 1) | /* must be tiled */
166 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
168 (0 << 16) | /* X offset for U(Cb), must be 0 */
169 (y_cb_offset << 0)); /* Y offset for U(Cb) */
171 (0 << 16) | /* X offset for V(Cr), must be 0 */
172 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173 ADVANCE_BCS_BATCH(batch);
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178 struct decode_state *decode_state,
180 struct gen7_mfd_context *gen7_mfd_context)
182 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
198 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
200 /* Post-debloing 4-6 */
201 if (gen7_mfd_context->post_deblocking_output.valid)
202 OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
206 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
212 /* uncompressed-video & stream out 7-12 */
213 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215 OUT_BCS_BATCH(batch, 0);
216 OUT_BCS_BATCH(batch, 0);
217 OUT_BCS_BATCH(batch, 0);
218 OUT_BCS_BATCH(batch, 0);
220 /* intra row-store scratch 13-15 */
221 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222 OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
232 /* deblocking-filter-row-store 16-18 */
233 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234 OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238 OUT_BCS_BATCH(batch, 0);
239 OUT_BCS_BATCH(batch, 0);
242 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
245 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246 struct object_surface *obj_surface;
248 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249 gen7_mfd_context->reference_surface[i].obj_surface &&
250 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
253 OUT_BCS_RELOC64(batch, obj_surface->bo,
254 I915_GEM_DOMAIN_INSTRUCTION, 0,
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
263 /* reference property 51 */
264 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266 /* Macroblock status & ILDB 52-57 */
267 OUT_BCS_BATCH(batch, 0);
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
274 /* the second Macroblock status 58-60 */
275 OUT_BCS_BATCH(batch, 0);
276 OUT_BCS_BATCH(batch, 0);
277 OUT_BCS_BATCH(batch, 0);
279 ADVANCE_BCS_BATCH(batch);
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284 dri_bo *slice_data_bo,
286 struct gen7_mfd_context *gen7_mfd_context)
288 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289 struct i965_driver_data *i965 = i965_driver_data(ctx);
291 BEGIN_BCS_BATCH(batch, 26);
292 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
294 OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296 /* Upper bound 4-5 */
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX indirect MV 6-10 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_COFF 11-15 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX IT_DBLK 16-20 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 /* MFX PAK_BSE object for encoder 21-25 */
322 OUT_BCS_BATCH(batch, 0);
323 OUT_BCS_BATCH(batch, 0);
324 OUT_BCS_BATCH(batch, 0);
325 OUT_BCS_BATCH(batch, 0);
326 OUT_BCS_BATCH(batch, 0);
328 ADVANCE_BCS_BATCH(batch);
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333 struct decode_state *decode_state,
335 struct gen7_mfd_context *gen7_mfd_context)
337 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338 struct i965_driver_data *i965 = i965_driver_data(ctx);
340 BEGIN_BCS_BATCH(batch, 10);
341 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
343 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344 OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348 OUT_BCS_BATCH(batch, 0);
349 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353 /* MPR Row Store Scratch buffer 4-6 */
354 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355 OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
359 OUT_BCS_BATCH(batch, 0);
360 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
366 if (gen7_mfd_context->bitplane_read_buffer.valid)
367 OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368 I915_GEM_DOMAIN_INSTRUCTION, 0,
371 OUT_BCS_BATCH(batch, 0);
372 OUT_BCS_BATCH(batch, 0);
374 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375 ADVANCE_BCS_BATCH(batch);
379 gen8_mfd_qm_state(VADriverContextP ctx,
383 struct gen7_mfd_context *gen7_mfd_context)
385 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386 unsigned int qm_buffer[16];
388 assert(qm_length <= 16 * 4);
389 memcpy(qm_buffer, qm, qm_length);
391 BEGIN_BCS_BATCH(batch, 18);
392 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393 OUT_BCS_BATCH(batch, qm_type << 0);
394 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395 ADVANCE_BCS_BATCH(batch);
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400 struct decode_state *decode_state,
401 struct gen7_mfd_context *gen7_mfd_context)
403 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
405 int mbaff_frame_flag;
406 unsigned int width_in_mbs, height_in_mbs;
407 VAPictureParameterBufferH264 *pic_param;
409 assert(decode_state->pic_param && decode_state->pic_param->buffer);
410 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
413 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
415 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
420 if ((img_struct & 0x1) == 0x1) {
421 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
423 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
426 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
430 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
433 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434 !pic_param->pic_fields.bits.field_pic_flag);
436 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
439 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
442 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
444 BEGIN_BCS_BATCH(batch, 17);
445 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
447 (width_in_mbs * height_in_mbs - 1));
449 ((height_in_mbs - 1) << 16) |
450 ((width_in_mbs - 1) << 0));
452 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
460 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467 (mbaff_frame_flag << 1) |
468 (pic_param->pic_fields.bits.field_pic_flag << 0));
469 OUT_BCS_BATCH(batch, 0);
470 OUT_BCS_BATCH(batch, 0);
471 OUT_BCS_BATCH(batch, 0);
472 OUT_BCS_BATCH(batch, 0);
473 OUT_BCS_BATCH(batch, 0);
474 OUT_BCS_BATCH(batch, 0);
475 OUT_BCS_BATCH(batch, 0);
476 OUT_BCS_BATCH(batch, 0);
477 OUT_BCS_BATCH(batch, 0);
478 OUT_BCS_BATCH(batch, 0);
479 OUT_BCS_BATCH(batch, 0);
480 OUT_BCS_BATCH(batch, 0);
481 ADVANCE_BCS_BATCH(batch);
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486 struct decode_state *decode_state,
487 struct gen7_mfd_context *gen7_mfd_context)
489 VAIQMatrixBufferH264 *iq_matrix;
490 VAPictureParameterBufferH264 *pic_param;
492 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
495 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
497 assert(decode_state->pic_param && decode_state->pic_param->buffer);
498 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
500 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
503 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511 struct decode_state *decode_state,
512 struct gen7_mfd_context *gen7_mfd_context)
514 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515 gen7_mfd_context->reference_surface);
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520 struct decode_state *decode_state,
521 VAPictureParameterBufferH264 *pic_param,
522 VASliceParameterBufferH264 *slice_param,
523 struct gen7_mfd_context *gen7_mfd_context)
525 struct i965_driver_data *i965 = i965_driver_data(ctx);
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
548 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
553 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
555 /* the current decoding frame/field */
556 va_pic = &pic_param->CurrPic;
557 obj_surface = decode_state->render_object;
558 assert(obj_surface->bo && obj_surface->private_data);
559 gen7_avc_surface = obj_surface->private_data;
561 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
565 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
568 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
572 const VAPictureH264 * const va_pic = avc_find_picture(
573 obj_surface->base.id, pic_param->ReferenceFrames,
574 ARRAY_ELEMS(pic_param->ReferenceFrames));
576 assert(va_pic != NULL);
577 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
580 OUT_BCS_BATCH(batch, 0);
581 OUT_BCS_BATCH(batch, 0);
585 va_pic = &pic_param->CurrPic;
586 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
589 ADVANCE_BCS_BATCH(batch);
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594 VAPictureParameterBufferH264 *pic_param,
595 VASliceParameterBufferH264 *next_slice_param,
596 struct gen7_mfd_context *gen7_mfd_context)
598 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603 VAPictureParameterBufferH264 *pic_param,
604 VASliceParameterBufferH264 *slice_param,
605 VASliceParameterBufferH264 *next_slice_param,
606 struct gen7_mfd_context *gen7_mfd_context)
608 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612 int num_ref_idx_l0, num_ref_idx_l1;
613 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
618 if (slice_param->slice_type == SLICE_TYPE_I ||
619 slice_param->slice_type == SLICE_TYPE_SI) {
620 slice_type = SLICE_TYPE_I;
621 } else if (slice_param->slice_type == SLICE_TYPE_P ||
622 slice_param->slice_type == SLICE_TYPE_SP) {
623 slice_type = SLICE_TYPE_P;
625 assert(slice_param->slice_type == SLICE_TYPE_B);
626 slice_type = SLICE_TYPE_B;
629 if (slice_type == SLICE_TYPE_I) {
630 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
634 } else if (slice_type == SLICE_TYPE_P) {
635 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
643 first_mb_in_slice = slice_param->first_mb_in_slice;
644 slice_hor_pos = first_mb_in_slice % width_in_mbs;
645 slice_ver_pos = first_mb_in_slice / width_in_mbs;
648 slice_ver_pos = slice_ver_pos << 1;
649 if (next_slice_param) {
650 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
655 next_slice_ver_pos = next_slice_ver_pos << 1;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839 gen7_mfd_context->reference_surface);
840 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843 assert(height_in_mbs > 0 && height_in_mbs <= 256);
845 /* Current decoded picture */
846 obj_surface = decode_state->render_object;
847 if (pic_param->pic_fields.bits.reference_pic_flag)
848 obj_surface->flags |= SURFACE_REFERENCED;
850 obj_surface->flags &= ~SURFACE_REFERENCED;
852 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
855 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
860 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
865 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866 bo = dri_bo_alloc(i965->intel.bufmgr,
871 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
874 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875 bo = dri_bo_alloc(i965->intel.bufmgr,
876 "deblocking filter row store",
877 width_in_mbs * 64 * 4,
880 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
883 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884 bo = dri_bo_alloc(i965->intel.bufmgr,
886 width_in_mbs * 64 * 2,
889 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
892 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893 bo = dri_bo_alloc(i965->intel.bufmgr,
895 width_in_mbs * 64 * 2,
898 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
901 gen7_mfd_context->bitplane_read_buffer.valid = 0;
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906 struct decode_state *decode_state,
907 struct gen7_mfd_context *gen7_mfd_context)
909 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910 VAPictureParameterBufferH264 *pic_param;
911 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912 dri_bo *slice_data_bo;
915 assert(decode_state->pic_param && decode_state->pic_param->buffer);
916 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
919 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920 intel_batchbuffer_emit_mi_flush(batch);
921 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
929 for (j = 0; j < decode_state->num_slice_params; j++) {
930 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932 slice_data_bo = decode_state->slice_datas[j]->bo;
933 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
935 if (j == decode_state->num_slice_params - 1)
936 next_slice_group_param = NULL;
938 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
940 if (j == 0 && slice_param->first_mb_in_slice)
941 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
943 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945 assert((slice_param->slice_type == SLICE_TYPE_I) ||
946 (slice_param->slice_type == SLICE_TYPE_SI) ||
947 (slice_param->slice_type == SLICE_TYPE_P) ||
948 (slice_param->slice_type == SLICE_TYPE_SP) ||
949 (slice_param->slice_type == SLICE_TYPE_B));
951 if (i < decode_state->slice_params[j]->num_elements - 1)
952 next_slice_param = slice_param + 1;
954 next_slice_param = next_slice_group_param;
956 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965 intel_batchbuffer_end_atomic(batch);
966 intel_batchbuffer_flush(batch);
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971 struct decode_state *decode_state,
972 struct gen7_mfd_context *gen7_mfd_context)
974 VAPictureParameterBufferMPEG2 *pic_param;
975 struct i965_driver_data *i965 = i965_driver_data(ctx);
976 struct object_surface *obj_surface;
978 unsigned int width_in_mbs;
980 assert(decode_state->pic_param && decode_state->pic_param->buffer);
981 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
984 mpeg2_set_reference_surfaces(
986 gen7_mfd_context->reference_surface,
991 /* Current decoded picture */
992 obj_surface = decode_state->render_object;
993 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
995 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998 gen7_mfd_context->pre_deblocking_output.valid = 1;
1000 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001 bo = dri_bo_alloc(i965->intel.bufmgr,
1002 "bsd mpc row store",
1006 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1009 gen7_mfd_context->post_deblocking_output.valid = 0;
1010 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018 struct decode_state *decode_state,
1019 struct gen7_mfd_context *gen7_mfd_context)
1021 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022 VAPictureParameterBufferMPEG2 *pic_param;
1023 unsigned int slice_concealment_disable_bit = 0;
1025 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1028 slice_concealment_disable_bit = 1;
1030 BEGIN_BCS_BATCH(batch, 13);
1031 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032 OUT_BCS_BATCH(batch,
1033 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045 OUT_BCS_BATCH(batch,
1046 pic_param->picture_coding_type << 9);
1047 OUT_BCS_BATCH(batch,
1048 (slice_concealment_disable_bit << 31) |
1049 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051 OUT_BCS_BATCH(batch, 0);
1052 OUT_BCS_BATCH(batch, 0);
1053 OUT_BCS_BATCH(batch, 0);
1054 OUT_BCS_BATCH(batch, 0);
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 ADVANCE_BCS_BATCH(batch);
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065 struct decode_state *decode_state,
1066 struct gen7_mfd_context *gen7_mfd_context)
1068 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1071 /* Update internal QM state */
1072 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073 VAIQMatrixBufferMPEG2 * const iq_matrix =
1074 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1076 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077 iq_matrix->load_intra_quantiser_matrix) {
1078 gen_iq_matrix->load_intra_quantiser_matrix =
1079 iq_matrix->load_intra_quantiser_matrix;
1080 if (iq_matrix->load_intra_quantiser_matrix) {
1081 for (j = 0; j < 64; j++)
1082 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083 iq_matrix->intra_quantiser_matrix[j];
1087 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088 iq_matrix->load_non_intra_quantiser_matrix) {
1089 gen_iq_matrix->load_non_intra_quantiser_matrix =
1090 iq_matrix->load_non_intra_quantiser_matrix;
1091 if (iq_matrix->load_non_intra_quantiser_matrix) {
1092 for (j = 0; j < 64; j++)
1093 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094 iq_matrix->non_intra_quantiser_matrix[j];
1099 /* Commit QM state to HW */
1100 for (i = 0; i < 2; i++) {
1101 unsigned char *qm = NULL;
1105 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106 qm = gen_iq_matrix->intra_quantiser_matrix;
1107 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1110 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1119 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125 VAPictureParameterBufferMPEG2 *pic_param,
1126 VASliceParameterBufferMPEG2 *slice_param,
1127 VASliceParameterBufferMPEG2 *next_slice_param,
1128 struct gen7_mfd_context *gen7_mfd_context)
1130 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1134 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1137 is_field_pic_wa = is_field_pic &&
1138 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1140 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141 hpos0 = slice_param->slice_horizontal_position;
1143 if (next_slice_param == NULL) {
1144 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1147 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148 hpos1 = next_slice_param->slice_horizontal_position;
1151 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1153 BEGIN_BCS_BATCH(batch, 5);
1154 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155 OUT_BCS_BATCH(batch,
1156 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157 OUT_BCS_BATCH(batch,
1158 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159 OUT_BCS_BATCH(batch,
1163 (next_slice_param == NULL) << 5 |
1164 (next_slice_param == NULL) << 3 |
1165 (slice_param->macroblock_offset & 0x7));
1166 OUT_BCS_BATCH(batch,
1167 (slice_param->quantiser_scale_code << 24) |
1168 (vpos1 << 8 | hpos1));
1169 ADVANCE_BCS_BATCH(batch);
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174 struct decode_state *decode_state,
1175 struct gen7_mfd_context *gen7_mfd_context)
1177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178 VAPictureParameterBufferMPEG2 *pic_param;
1179 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180 dri_bo *slice_data_bo;
1183 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1186 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188 intel_batchbuffer_emit_mi_flush(batch);
1189 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1196 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1200 for (j = 0; j < decode_state->num_slice_params; j++) {
1201 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203 slice_data_bo = decode_state->slice_datas[j]->bo;
1204 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1206 if (j == decode_state->num_slice_params - 1)
1207 next_slice_group_param = NULL;
1209 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1211 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1214 if (i < decode_state->slice_params[j]->num_elements - 1)
1215 next_slice_param = slice_param + 1;
1217 next_slice_param = next_slice_group_param;
1219 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224 intel_batchbuffer_end_atomic(batch);
1225 intel_batchbuffer_flush(batch);
1228 static const int va_to_gen7_vc1_pic_type[5] = {
1232 GEN7_VC1_BI_PICTURE,
1236 static const int va_to_gen7_vc1_mv[4] = {
1238 2, /* 1-MV half-pel */
1239 3, /* 1-MV half-pef bilinear */
1243 static const int b_picture_scale_factor[21] = {
1244 128, 85, 170, 64, 192,
1245 51, 102, 153, 204, 43,
1246 215, 37, 74, 111, 148,
1247 185, 222, 32, 96, 160,
1251 static const int va_to_gen7_vc1_condover[3] = {
1257 static const int va_to_gen7_vc1_profile[4] = {
1258 GEN7_VC1_SIMPLE_PROFILE,
1259 GEN7_VC1_MAIN_PROFILE,
1260 GEN7_VC1_RESERVED_PROFILE,
1261 GEN7_VC1_ADVANCED_PROFILE
1265 gen8_mfd_free_vc1_surface(void **data)
1267 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1269 if (!gen7_vc1_surface)
1272 dri_bo_unreference(gen7_vc1_surface->dmv);
1273 free(gen7_vc1_surface);
1278 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1279 VAPictureParameterBufferVC1 *pic_param,
1280 struct object_surface *obj_surface)
1282 struct i965_driver_data *i965 = i965_driver_data(ctx);
1283 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1284 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1287 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1289 if (!gen7_vc1_surface) {
1290 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1292 if (!gen7_vc1_surface)
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1301 if (gen7_vc1_surface->dmv == NULL) {
1302 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303 "direct mv w/r buffer",
1304 width_in_mbs * height_in_mbs * 64,
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311 struct decode_state *decode_state,
1312 struct gen7_mfd_context *gen7_mfd_context)
1314 VAPictureParameterBufferVC1 *pic_param;
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct object_surface *obj_surface;
1321 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324 picture_type = pic_param->picture_fields.bits.picture_type;
1326 intel_update_vc1_frame_store_index(ctx,
1329 gen7_mfd_context->reference_surface);
1331 /* Current decoded picture */
1332 obj_surface = decode_state->render_object;
1333 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1334 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1336 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1341 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1346 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347 bo = dri_bo_alloc(i965->intel.bufmgr,
1352 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1355 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356 bo = dri_bo_alloc(i965->intel.bufmgr,
1357 "deblocking filter row store",
1358 width_in_mbs * 7 * 64,
1361 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1364 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 "bsd mpc row store",
1370 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1373 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1375 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1378 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1383 uint8_t *src = NULL, *dst = NULL;
1385 assert(decode_state->bit_plane->buffer);
1386 src = decode_state->bit_plane->buffer;
1388 bo = dri_bo_alloc(i965->intel.bufmgr,
1390 bitplane_width * height_in_mbs,
1393 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1395 dri_bo_map(bo, True);
1396 assert(bo->virtual);
1399 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401 int src_index, dst_index;
1405 src_index = (src_h * width_in_mbs + src_w) / 2;
1406 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407 src_value = ((src[src_index] >> src_shift) & 0xf);
1409 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1413 dst_index = src_w / 2;
1414 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1418 dst[src_w / 2] >>= 4;
1420 dst += bitplane_width;
1425 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430 struct decode_state *decode_state,
1431 struct gen7_mfd_context *gen7_mfd_context)
1433 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434 VAPictureParameterBufferVC1 *pic_param;
1435 struct object_surface *obj_surface;
1436 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438 int unified_mv_mode;
1439 int ref_field_pic_polarity = 0;
1440 int scale_factor = 0;
1442 int dmv_surface_valid = 0;
1448 int interpolation_mode = 0;
1450 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1453 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1463 alt_pquant_config = 0;
1464 alt_pquant_edge_mask = 0;
1465 } else if (dquant == 2) {
1466 alt_pquant_config = 1;
1467 alt_pquant_edge_mask = 0xf;
1469 assert(dquant == 1);
1470 if (dquantfrm == 0) {
1471 alt_pquant_config = 0;
1472 alt_pquant_edge_mask = 0;
1475 assert(dquantfrm == 1);
1476 alt_pquant_config = 1;
1478 switch (dqprofile) {
1480 if (dqbilevel == 0) {
1481 alt_pquant_config = 2;
1482 alt_pquant_edge_mask = 0;
1484 assert(dqbilevel == 1);
1485 alt_pquant_config = 3;
1486 alt_pquant_edge_mask = 0;
1491 alt_pquant_edge_mask = 0xf;
1496 alt_pquant_edge_mask = 0x9;
1498 alt_pquant_edge_mask = (0x3 << dqdbedge);
1503 alt_pquant_edge_mask = (0x1 << dqsbedge);
1512 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1516 assert(pic_param->mv_fields.bits.mv_mode < 4);
1517 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1520 if (pic_param->sequence_fields.bits.interlace == 1 &&
1521 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522 /* FIXME: calculate reference field picture polarity */
1524 ref_field_pic_polarity = 0;
1527 if (pic_param->b_picture_fraction < 21)
1528 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1530 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1532 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533 picture_type == GEN7_VC1_I_PICTURE)
1534 picture_type = GEN7_VC1_BI_PICTURE;
1536 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1539 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1542 * 8.3.6.2.1 Transform Type Selection
1543 * If variable-sized transform coding is not enabled,
1544 * then the 8x8 transform shall be used for all blocks.
1545 * it is also MFX_VC1_PIC_STATE requirement.
1547 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1549 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1553 if (picture_type == GEN7_VC1_B_PICTURE) {
1554 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1556 obj_surface = decode_state->reference_objects[1];
1559 gen7_vc1_surface = obj_surface->private_data;
1561 if (!gen7_vc1_surface ||
1562 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564 dmv_surface_valid = 0;
1566 dmv_surface_valid = 1;
1569 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1571 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1574 if (pic_param->picture_fields.bits.top_field_first)
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581 brfd = pic_param->reference_fields.bits.reference_distance;
1582 brfd = (scale_factor * brfd) >> 8;
1583 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1589 overlap = pic_param->sequence_fields.bits.overlap;
1593 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1594 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1595 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1599 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1600 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1603 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1604 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1605 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1607 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1608 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1615 assert(pic_param->conditional_overlap_flag < 3);
1616 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1618 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1619 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1621 interpolation_mode = 9; /* Half-pel bilinear */
1622 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1623 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1624 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1625 interpolation_mode = 1; /* Half-pel bicubic */
1627 interpolation_mode = 0; /* Quarter-pel bicubic */
1629 BEGIN_BCS_BATCH(batch, 6);
1630 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1631 OUT_BCS_BATCH(batch,
1632 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1633 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1634 OUT_BCS_BATCH(batch,
1635 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1636 dmv_surface_valid << 15 |
1637 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1638 pic_param->rounding_control << 13 |
1639 pic_param->sequence_fields.bits.syncmarker << 12 |
1640 interpolation_mode << 8 |
1641 0 << 7 | /* FIXME: scale up or down ??? */
1642 pic_param->range_reduction_frame << 6 |
1643 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1645 !pic_param->picture_fields.bits.is_first_field << 3 |
1646 (pic_param->sequence_fields.bits.profile == 3) << 0);
1647 OUT_BCS_BATCH(batch,
1648 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1649 picture_type << 26 |
1652 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1654 OUT_BCS_BATCH(batch,
1655 unified_mv_mode << 28 |
1656 pic_param->mv_fields.bits.four_mv_switch << 27 |
1657 pic_param->fast_uvmc_flag << 26 |
1658 ref_field_pic_polarity << 25 |
1659 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1660 pic_param->reference_fields.bits.reference_distance << 20 |
1661 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1662 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1663 pic_param->mv_fields.bits.extended_mv_range << 8 |
1664 alt_pquant_edge_mask << 4 |
1665 alt_pquant_config << 2 |
1666 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1667 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1668 OUT_BCS_BATCH(batch,
1669 !!pic_param->bitplane_present.value << 31 |
1670 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1671 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1672 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1673 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1674 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1675 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1676 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1677 pic_param->mv_fields.bits.mv_table << 20 |
1678 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1679 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1680 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1681 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1682 pic_param->mb_mode_table << 8 |
1684 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1685 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1686 pic_param->cbp_table << 0);
1687 ADVANCE_BCS_BATCH(batch);
1691 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1692 struct decode_state *decode_state,
1693 struct gen7_mfd_context *gen7_mfd_context)
1695 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1696 VAPictureParameterBufferVC1 *pic_param;
1697 int intensitycomp_single;
1699 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1700 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1701 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1703 BEGIN_BCS_BATCH(batch, 6);
1704 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1705 OUT_BCS_BATCH(batch,
1706 0 << 14 | /* FIXME: double ??? */
1708 intensitycomp_single << 10 |
1709 intensitycomp_single << 8 |
1710 0 << 4 | /* FIXME: interlace mode */
1712 OUT_BCS_BATCH(batch,
1713 pic_param->luma_shift << 16 |
1714 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1717 OUT_BCS_BATCH(batch, 0);
1718 ADVANCE_BCS_BATCH(batch);
1722 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1723 struct decode_state *decode_state,
1724 struct gen7_mfd_context *gen7_mfd_context)
1726 struct i965_driver_data *i965 = i965_driver_data(ctx);
1727 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1728 struct object_surface *obj_surface;
1729 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1731 obj_surface = decode_state->render_object;
1733 if (obj_surface && obj_surface->private_data) {
1734 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1737 obj_surface = decode_state->reference_objects[1];
1739 if (obj_surface && obj_surface->private_data) {
1740 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1743 BEGIN_BCS_BATCH(batch, 7);
1744 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1746 if (dmv_write_buffer)
1747 OUT_BCS_RELOC64(batch, dmv_write_buffer,
1748 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1755 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1757 if (dmv_read_buffer)
1758 OUT_BCS_RELOC64(batch, dmv_read_buffer,
1759 I915_GEM_DOMAIN_INSTRUCTION, 0,
1762 OUT_BCS_BATCH(batch, 0);
1763 OUT_BCS_BATCH(batch, 0);
1766 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1768 ADVANCE_BCS_BATCH(batch);
1772 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1774 int out_slice_data_bit_offset;
1775 int slice_header_size = in_slice_data_bit_offset / 8;
1779 out_slice_data_bit_offset = in_slice_data_bit_offset;
1781 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1782 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1787 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1790 return out_slice_data_bit_offset;
1794 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1795 VAPictureParameterBufferVC1 *pic_param,
1796 VASliceParameterBufferVC1 *slice_param,
1797 VASliceParameterBufferVC1 *next_slice_param,
1798 dri_bo *slice_data_bo,
1799 struct gen7_mfd_context *gen7_mfd_context)
1801 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1802 int next_slice_start_vert_pos;
1803 int macroblock_offset;
1804 uint8_t *slice_data = NULL;
1806 dri_bo_map(slice_data_bo, 0);
1807 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1808 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1809 slice_param->macroblock_offset,
1810 pic_param->sequence_fields.bits.profile);
1811 dri_bo_unmap(slice_data_bo);
1813 if (next_slice_param)
1814 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1816 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1818 BEGIN_BCS_BATCH(batch, 5);
1819 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1820 OUT_BCS_BATCH(batch,
1821 slice_param->slice_data_size - (macroblock_offset >> 3));
1822 OUT_BCS_BATCH(batch,
1823 slice_param->slice_data_offset + (macroblock_offset >> 3));
1824 OUT_BCS_BATCH(batch,
1825 slice_param->slice_vertical_position << 16 |
1826 next_slice_start_vert_pos << 0);
1827 OUT_BCS_BATCH(batch,
1828 (macroblock_offset & 0x7));
1829 ADVANCE_BCS_BATCH(batch);
1833 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1834 struct decode_state *decode_state,
1835 struct gen7_mfd_context *gen7_mfd_context)
1837 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1838 VAPictureParameterBufferVC1 *pic_param;
1839 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1840 dri_bo *slice_data_bo;
1843 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1844 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1846 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1847 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1848 intel_batchbuffer_emit_mi_flush(batch);
1849 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1850 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1851 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1852 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1853 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1854 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1855 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1857 for (j = 0; j < decode_state->num_slice_params; j++) {
1858 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1859 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1860 slice_data_bo = decode_state->slice_datas[j]->bo;
1861 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1863 if (j == decode_state->num_slice_params - 1)
1864 next_slice_group_param = NULL;
1866 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1868 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1869 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1871 if (i < decode_state->slice_params[j]->num_elements - 1)
1872 next_slice_param = slice_param + 1;
1874 next_slice_param = next_slice_group_param;
1876 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1881 intel_batchbuffer_end_atomic(batch);
1882 intel_batchbuffer_flush(batch);
1886 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1887 struct decode_state *decode_state,
1888 struct gen7_mfd_context *gen7_mfd_context)
1890 struct object_surface *obj_surface;
1891 VAPictureParameterBufferJPEGBaseline *pic_param;
1892 int subsampling = SUBSAMPLE_YUV420;
1893 int fourcc = VA_FOURCC_IMC3;
1895 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1897 if (pic_param->num_components == 1) {
1898 subsampling = SUBSAMPLE_YUV400;
1899 fourcc = VA_FOURCC_Y800;
1900 } else if (pic_param->num_components == 3) {
1901 int h1 = pic_param->components[0].h_sampling_factor;
1902 int h2 = pic_param->components[1].h_sampling_factor;
1903 int h3 = pic_param->components[2].h_sampling_factor;
1904 int v1 = pic_param->components[0].v_sampling_factor;
1905 int v2 = pic_param->components[1].v_sampling_factor;
1906 int v3 = pic_param->components[2].v_sampling_factor;
1908 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1909 v1 == 2 && v2 == 1 && v3 == 1) {
1910 subsampling = SUBSAMPLE_YUV420;
1911 fourcc = VA_FOURCC_IMC3;
1912 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1913 v1 == 1 && v2 == 1 && v3 == 1) {
1914 subsampling = SUBSAMPLE_YUV422H;
1915 fourcc = VA_FOURCC_422H;
1916 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1917 v1 == 1 && v2 == 1 && v3 == 1) {
1918 subsampling = SUBSAMPLE_YUV444;
1919 fourcc = VA_FOURCC_444P;
1920 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1921 v1 == 1 && v2 == 1 && v3 == 1) {
1922 subsampling = SUBSAMPLE_YUV411;
1923 fourcc = VA_FOURCC_411P;
1924 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1925 v1 == 2 && v2 == 1 && v3 == 1) {
1926 subsampling = SUBSAMPLE_YUV422V;
1927 fourcc = VA_FOURCC_422V;
1928 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1929 v1 == 2 && v2 == 2 && v3 == 2) {
1930 subsampling = SUBSAMPLE_YUV422H;
1931 fourcc = VA_FOURCC_422H;
1932 } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1933 v1 == 2 && v2 == 1 && v3 == 1) {
1934 subsampling = SUBSAMPLE_YUV422V;
1935 fourcc = VA_FOURCC_422V;
1943 /* Current decoded picture */
1944 obj_surface = decode_state->render_object;
1945 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1947 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1948 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1949 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1950 gen7_mfd_context->pre_deblocking_output.valid = 1;
1952 gen7_mfd_context->post_deblocking_output.bo = NULL;
1953 gen7_mfd_context->post_deblocking_output.valid = 0;
1955 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1956 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1958 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1959 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1961 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1962 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1964 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1965 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1967 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1968 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1971 static const int va_to_gen7_jpeg_rotation[4] = {
1972 GEN7_JPEG_ROTATION_0,
1973 GEN7_JPEG_ROTATION_90,
1974 GEN7_JPEG_ROTATION_180,
1975 GEN7_JPEG_ROTATION_270
1979 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1980 struct decode_state *decode_state,
1981 struct gen7_mfd_context *gen7_mfd_context)
1983 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1984 VAPictureParameterBufferJPEGBaseline *pic_param;
1985 int chroma_type = GEN7_YUV420;
1986 int frame_width_in_blks;
1987 int frame_height_in_blks;
1989 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1990 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1992 if (pic_param->num_components == 1)
1993 chroma_type = GEN7_YUV400;
1994 else if (pic_param->num_components == 3) {
1995 int h1 = pic_param->components[0].h_sampling_factor;
1996 int h2 = pic_param->components[1].h_sampling_factor;
1997 int h3 = pic_param->components[2].h_sampling_factor;
1998 int v1 = pic_param->components[0].v_sampling_factor;
1999 int v2 = pic_param->components[1].v_sampling_factor;
2000 int v3 = pic_param->components[2].v_sampling_factor;
2002 if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003 v1 == 2 && v2 == 1 && v3 == 1)
2004 chroma_type = GEN7_YUV420;
2005 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2006 v1 == 1 && v2 == 1 && v3 == 1)
2007 chroma_type = GEN7_YUV422H_2Y;
2008 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2009 v1 == 1 && v2 == 1 && v3 == 1)
2010 chroma_type = GEN7_YUV444;
2011 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2012 v1 == 1 && v2 == 1 && v3 == 1)
2013 chroma_type = GEN7_YUV411;
2014 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2015 v1 == 2 && v2 == 1 && v3 == 1)
2016 chroma_type = GEN7_YUV422V_2Y;
2017 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2018 v1 == 2 && v2 == 2 && v3 == 2)
2019 chroma_type = GEN7_YUV422H_4Y;
2020 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2021 v1 == 2 && v2 == 1 && v3 == 1)
2022 chroma_type = GEN7_YUV422V_4Y;
2027 if (chroma_type == GEN7_YUV400 ||
2028 chroma_type == GEN7_YUV444 ||
2029 chroma_type == GEN7_YUV422V_2Y) {
2030 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2031 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2032 } else if (chroma_type == GEN7_YUV411) {
2033 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2034 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2036 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2037 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2040 BEGIN_BCS_BATCH(batch, 3);
2041 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2042 OUT_BCS_BATCH(batch,
2043 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2044 (chroma_type << 0));
2045 OUT_BCS_BATCH(batch,
2046 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2047 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2048 ADVANCE_BCS_BATCH(batch);
2051 static const int va_to_gen7_jpeg_hufftable[2] = {
2057 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2058 struct decode_state *decode_state,
2059 struct gen7_mfd_context *gen7_mfd_context,
2062 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2063 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2066 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2069 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2071 for (index = 0; index < num_tables; index++) {
2072 int id = va_to_gen7_jpeg_hufftable[index];
2073 if (!huffman_table->load_huffman_table[index])
2075 BEGIN_BCS_BATCH(batch, 53);
2076 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2077 OUT_BCS_BATCH(batch, id);
2078 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2079 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2080 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2081 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2082 ADVANCE_BCS_BATCH(batch);
2086 static const int va_to_gen7_jpeg_qm[5] = {
2088 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2089 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2090 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2091 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2095 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2096 struct decode_state *decode_state,
2097 struct gen7_mfd_context *gen7_mfd_context)
2099 VAPictureParameterBufferJPEGBaseline *pic_param;
2100 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2103 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2106 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2107 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2109 assert(pic_param->num_components <= 3);
2111 for (index = 0; index < pic_param->num_components; index++) {
2112 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2114 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2115 unsigned char raster_qm[64];
2118 if (id > 4 || id < 1)
2121 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2124 qm_type = va_to_gen7_jpeg_qm[id];
2126 for (j = 0; j < 64; j++)
2127 raster_qm[zigzag_direct[j]] = qm[j];
2129 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2134 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2135 VAPictureParameterBufferJPEGBaseline *pic_param,
2136 VASliceParameterBufferJPEGBaseline *slice_param,
2137 VASliceParameterBufferJPEGBaseline *next_slice_param,
2138 dri_bo *slice_data_bo,
2139 struct gen7_mfd_context *gen7_mfd_context)
2141 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2142 int scan_component_mask = 0;
2145 assert(slice_param->num_components > 0);
2146 assert(slice_param->num_components < 4);
2147 assert(slice_param->num_components <= pic_param->num_components);
2149 for (i = 0; i < slice_param->num_components; i++) {
2150 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2152 scan_component_mask |= (1 << 0);
2155 scan_component_mask |= (1 << 1);
2158 scan_component_mask |= (1 << 2);
2166 BEGIN_BCS_BATCH(batch, 6);
2167 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2168 OUT_BCS_BATCH(batch,
2169 slice_param->slice_data_size);
2170 OUT_BCS_BATCH(batch,
2171 slice_param->slice_data_offset);
2172 OUT_BCS_BATCH(batch,
2173 slice_param->slice_horizontal_position << 16 |
2174 slice_param->slice_vertical_position << 0);
2175 OUT_BCS_BATCH(batch,
2176 ((slice_param->num_components != 1) << 30) | /* interleaved */
2177 (scan_component_mask << 27) | /* scan components */
2178 (0 << 26) | /* disable interrupt allowed */
2179 (slice_param->num_mcus << 0)); /* MCU count */
2180 OUT_BCS_BATCH(batch,
2181 (slice_param->restart_interval << 0)); /* RestartInterval */
2182 ADVANCE_BCS_BATCH(batch);
2185 /* Workaround for JPEG decoding on Ivybridge */
2191 unsigned char data[32];
2193 int data_bit_offset;
2195 } gen7_jpeg_wa_clip = {
2199 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2200 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2208 gen8_jpeg_wa_init(VADriverContextP ctx,
2209 struct gen7_mfd_context *gen7_mfd_context)
2211 struct i965_driver_data *i965 = i965_driver_data(ctx);
2213 struct object_surface *obj_surface;
2215 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2216 i965_DestroySurfaces(ctx,
2217 &gen7_mfd_context->jpeg_wa_surface_id,
2220 status = i965_CreateSurfaces(ctx,
2221 gen7_jpeg_wa_clip.width,
2222 gen7_jpeg_wa_clip.height,
2223 VA_RT_FORMAT_YUV420,
2225 &gen7_mfd_context->jpeg_wa_surface_id);
2226 assert(status == VA_STATUS_SUCCESS);
2228 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2229 assert(obj_surface);
2230 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2231 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2233 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2234 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2238 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2240 gen7_jpeg_wa_clip.data_size,
2241 gen7_jpeg_wa_clip.data);
2246 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2247 struct gen7_mfd_context *gen7_mfd_context)
2249 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2251 BEGIN_BCS_BATCH(batch, 5);
2252 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2253 OUT_BCS_BATCH(batch,
2254 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2255 (MFD_MODE_VLD << 15) | /* VLD mode */
2256 (0 << 10) | /* disable Stream-Out */
2257 (0 << 9) | /* Post Deblocking Output */
2258 (1 << 8) | /* Pre Deblocking Output */
2259 (0 << 5) | /* not in stitch mode */
2260 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2261 (MFX_FORMAT_AVC << 0));
2262 OUT_BCS_BATCH(batch,
2263 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2264 (0 << 3) | /* terminate if AVC mbdata error occurs */
2265 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2268 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2269 OUT_BCS_BATCH(batch, 0); /* reserved */
2270 ADVANCE_BCS_BATCH(batch);
2274 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2275 struct gen7_mfd_context *gen7_mfd_context)
2277 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2278 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2280 BEGIN_BCS_BATCH(batch, 6);
2281 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2282 OUT_BCS_BATCH(batch, 0);
2283 OUT_BCS_BATCH(batch,
2284 ((obj_surface->orig_width - 1) << 18) |
2285 ((obj_surface->orig_height - 1) << 4));
2286 OUT_BCS_BATCH(batch,
2287 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2288 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2289 (0 << 22) | /* surface object control state, ignored */
2290 ((obj_surface->width - 1) << 3) | /* pitch */
2291 (0 << 2) | /* must be 0 */
2292 (1 << 1) | /* must be tiled */
2293 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2294 OUT_BCS_BATCH(batch,
2295 (0 << 16) | /* X offset for U(Cb), must be 0 */
2296 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2297 OUT_BCS_BATCH(batch,
2298 (0 << 16) | /* X offset for V(Cr), must be 0 */
2299 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2300 ADVANCE_BCS_BATCH(batch);
2304 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2305 struct gen7_mfd_context *gen7_mfd_context)
2307 struct i965_driver_data *i965 = i965_driver_data(ctx);
2308 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2309 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2313 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2318 BEGIN_BCS_BATCH(batch, 61);
2319 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2320 OUT_BCS_RELOC64(batch,
2322 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2324 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2327 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2328 OUT_BCS_BATCH(batch, 0);
2329 OUT_BCS_BATCH(batch, 0);
2331 /* uncompressed-video & stream out 7-12 */
2332 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2339 /* the DW 13-15 is for intra row store scratch */
2340 OUT_BCS_RELOC64(batch,
2342 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2345 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2347 /* the DW 16-18 is for deblocking filter */
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2350 OUT_BCS_BATCH(batch, 0);
2353 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2354 OUT_BCS_BATCH(batch, 0);
2355 OUT_BCS_BATCH(batch, 0);
2357 OUT_BCS_BATCH(batch, 0);
2359 /* the DW52-54 is for mb status address */
2360 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2362 OUT_BCS_BATCH(batch, 0);
2363 /* the DW56-60 is for ILDB & second ILDB address */
2364 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_BATCH(batch, 0);
2366 OUT_BCS_BATCH(batch, 0);
2367 OUT_BCS_BATCH(batch, 0);
2368 OUT_BCS_BATCH(batch, 0);
2369 OUT_BCS_BATCH(batch, 0);
2371 ADVANCE_BCS_BATCH(batch);
2373 dri_bo_unreference(intra_bo);
2377 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2378 struct gen7_mfd_context *gen7_mfd_context)
2380 struct i965_driver_data *i965 = i965_driver_data(ctx);
2381 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2382 dri_bo *bsd_mpc_bo, *mpr_bo;
2384 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2385 "bsd mpc row store",
2386 11520, /* 1.5 * 120 * 64 */
2389 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2391 7680, /* 1. 0 * 120 * 64 */
2394 BEGIN_BCS_BATCH(batch, 10);
2395 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2397 OUT_BCS_RELOC64(batch,
2399 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2402 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2404 OUT_BCS_RELOC64(batch,
2406 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2408 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2410 OUT_BCS_BATCH(batch, 0);
2411 OUT_BCS_BATCH(batch, 0);
2412 OUT_BCS_BATCH(batch, 0);
2414 ADVANCE_BCS_BATCH(batch);
2416 dri_bo_unreference(bsd_mpc_bo);
2417 dri_bo_unreference(mpr_bo);
2421 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2422 struct gen7_mfd_context *gen7_mfd_context)
2428 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2429 struct gen7_mfd_context *gen7_mfd_context)
2431 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2433 int mbaff_frame_flag = 0;
2434 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2436 BEGIN_BCS_BATCH(batch, 16);
2437 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2438 OUT_BCS_BATCH(batch,
2439 width_in_mbs * height_in_mbs);
2440 OUT_BCS_BATCH(batch,
2441 ((height_in_mbs - 1) << 16) |
2442 ((width_in_mbs - 1) << 0));
2443 OUT_BCS_BATCH(batch,
2448 (0 << 12) | /* differ from GEN6 */
2451 OUT_BCS_BATCH(batch,
2452 (1 << 10) | /* 4:2:0 */
2453 (1 << 7) | /* CABAC */
2459 (mbaff_frame_flag << 1) |
2461 OUT_BCS_BATCH(batch, 0);
2462 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2464 OUT_BCS_BATCH(batch, 0);
2465 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2467 OUT_BCS_BATCH(batch, 0);
2468 OUT_BCS_BATCH(batch, 0);
2469 OUT_BCS_BATCH(batch, 0);
2470 OUT_BCS_BATCH(batch, 0);
2471 OUT_BCS_BATCH(batch, 0);
2472 ADVANCE_BCS_BATCH(batch);
2476 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2477 struct gen7_mfd_context *gen7_mfd_context)
2479 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2482 BEGIN_BCS_BATCH(batch, 71);
2483 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2485 /* reference surfaces 0..15 */
2486 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2487 OUT_BCS_BATCH(batch, 0); /* top */
2488 OUT_BCS_BATCH(batch, 0); /* bottom */
2491 OUT_BCS_BATCH(batch, 0);
2493 /* the current decoding frame/field */
2494 OUT_BCS_BATCH(batch, 0); /* top */
2495 OUT_BCS_BATCH(batch, 0);
2496 OUT_BCS_BATCH(batch, 0);
2499 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2500 OUT_BCS_BATCH(batch, 0);
2501 OUT_BCS_BATCH(batch, 0);
2504 OUT_BCS_BATCH(batch, 0);
2505 OUT_BCS_BATCH(batch, 0);
2507 ADVANCE_BCS_BATCH(batch);
2511 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2512 struct gen7_mfd_context *gen7_mfd_context)
2514 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2516 BEGIN_BCS_BATCH(batch, 11);
2517 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2518 OUT_BCS_RELOC64(batch,
2519 gen7_mfd_context->jpeg_wa_slice_data_bo,
2520 I915_GEM_DOMAIN_INSTRUCTION, 0,
2522 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2523 OUT_BCS_BATCH(batch, 0);
2524 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525 OUT_BCS_BATCH(batch, 0);
2526 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527 OUT_BCS_BATCH(batch, 0);
2528 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529 OUT_BCS_BATCH(batch, 0);
2530 ADVANCE_BCS_BATCH(batch);
2534 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2535 struct gen7_mfd_context *gen7_mfd_context)
2537 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2539 /* the input bitsteam format on GEN7 differs from GEN6 */
2540 BEGIN_BCS_BATCH(batch, 6);
2541 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2542 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2543 OUT_BCS_BATCH(batch, 0);
2544 OUT_BCS_BATCH(batch,
2550 OUT_BCS_BATCH(batch,
2551 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2554 (1 << 3) | /* LastSlice Flag */
2555 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2556 OUT_BCS_BATCH(batch, 0);
2557 ADVANCE_BCS_BATCH(batch);
2561 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2562 struct gen7_mfd_context *gen7_mfd_context)
2564 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2565 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2566 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2567 int first_mb_in_slice = 0;
2568 int slice_type = SLICE_TYPE_I;
2570 BEGIN_BCS_BATCH(batch, 11);
2571 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2572 OUT_BCS_BATCH(batch, slice_type);
2573 OUT_BCS_BATCH(batch,
2574 (num_ref_idx_l1 << 24) |
2575 (num_ref_idx_l0 << 16) |
2578 OUT_BCS_BATCH(batch,
2580 (1 << 27) | /* disable Deblocking */
2582 (gen7_jpeg_wa_clip.qp << 16) |
2585 OUT_BCS_BATCH(batch,
2586 (slice_ver_pos << 24) |
2587 (slice_hor_pos << 16) |
2588 (first_mb_in_slice << 0));
2589 OUT_BCS_BATCH(batch,
2590 (next_slice_ver_pos << 16) |
2591 (next_slice_hor_pos << 0));
2592 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2593 OUT_BCS_BATCH(batch, 0);
2594 OUT_BCS_BATCH(batch, 0);
2595 OUT_BCS_BATCH(batch, 0);
2596 OUT_BCS_BATCH(batch, 0);
2597 ADVANCE_BCS_BATCH(batch);
2601 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2602 struct gen7_mfd_context *gen7_mfd_context)
2604 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2605 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2606 intel_batchbuffer_emit_mi_flush(batch);
2607 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2608 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2609 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2610 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2611 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2612 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2613 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2615 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2616 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2617 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2623 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2624 struct decode_state *decode_state,
2625 struct gen7_mfd_context *gen7_mfd_context)
2627 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2628 VAPictureParameterBufferJPEGBaseline *pic_param;
2629 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2630 dri_bo *slice_data_bo;
2631 int i, j, max_selector = 0;
2633 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2634 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2636 /* Currently only support Baseline DCT */
2637 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2638 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2640 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2642 intel_batchbuffer_emit_mi_flush(batch);
2643 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2644 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2645 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2647 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2649 for (j = 0; j < decode_state->num_slice_params; j++) {
2650 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2651 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2652 slice_data_bo = decode_state->slice_datas[j]->bo;
2653 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2655 if (j == decode_state->num_slice_params - 1)
2656 next_slice_group_param = NULL;
2658 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2660 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2663 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2665 if (i < decode_state->slice_params[j]->num_elements - 1)
2666 next_slice_param = slice_param + 1;
2668 next_slice_param = next_slice_group_param;
2670 for (component = 0; component < slice_param->num_components; component++) {
2671 if (max_selector < slice_param->components[component].dc_table_selector)
2672 max_selector = slice_param->components[component].dc_table_selector;
2674 if (max_selector < slice_param->components[component].ac_table_selector)
2675 max_selector = slice_param->components[component].ac_table_selector;
2682 assert(max_selector < 2);
2683 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2685 for (j = 0; j < decode_state->num_slice_params; j++) {
2686 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2687 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2688 slice_data_bo = decode_state->slice_datas[j]->bo;
2689 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2691 if (j == decode_state->num_slice_params - 1)
2692 next_slice_group_param = NULL;
2694 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2696 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2697 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2699 if (i < decode_state->slice_params[j]->num_elements - 1)
2700 next_slice_param = slice_param + 1;
2702 next_slice_param = next_slice_group_param;
2704 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2709 intel_batchbuffer_end_atomic(batch);
2710 intel_batchbuffer_flush(batch);
2713 static const int vp8_dc_qlookup[128] =
2715 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2716 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2717 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2718 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2719 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2720 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2721 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2722 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2725 static const int vp8_ac_qlookup[128] =
2727 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2728 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2729 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2730 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2731 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2732 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2733 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2734 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2737 static inline unsigned int vp8_clip_quantization_index(int index)
2748 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2749 struct decode_state *decode_state,
2750 struct gen7_mfd_context *gen7_mfd_context)
2752 struct object_surface *obj_surface;
2753 struct i965_driver_data *i965 = i965_driver_data(ctx);
2755 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2756 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2757 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2759 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2760 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2762 intel_update_vp8_frame_store_index(ctx,
2765 gen7_mfd_context->reference_surface);
2767 /* Current decoded picture */
2768 obj_surface = decode_state->render_object;
2769 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2771 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2772 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2773 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2774 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2776 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2777 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2778 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2779 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2781 intel_ensure_vp8_segmentation_buffer(ctx,
2782 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2784 /* The same as AVC */
2785 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2786 bo = dri_bo_alloc(i965->intel.bufmgr,
2791 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2792 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2794 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2795 bo = dri_bo_alloc(i965->intel.bufmgr,
2796 "deblocking filter row store",
2797 width_in_mbs * 64 * 4,
2800 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2801 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2803 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2804 bo = dri_bo_alloc(i965->intel.bufmgr,
2805 "bsd mpc row store",
2806 width_in_mbs * 64 * 2,
2809 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2810 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2812 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2813 bo = dri_bo_alloc(i965->intel.bufmgr,
2815 width_in_mbs * 64 * 2,
2818 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2819 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2821 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2825 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2826 struct decode_state *decode_state,
2827 struct gen7_mfd_context *gen7_mfd_context)
2829 struct i965_driver_data *i965 = i965_driver_data(ctx);
2830 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2831 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2832 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2833 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2834 dri_bo *probs_bo = decode_state->probability_data->bo;
2836 unsigned int quantization_value[4][6];
2838 /* There is no safe way to error out if the segmentation buffer
2839 could not be allocated. So, instead of aborting, simply decode
2840 something even if the result may look totally inacurate */
2841 const unsigned int enable_segmentation =
2842 pic_param->pic_fields.bits.segmentation_enabled &&
2843 gen7_mfd_context->segmentation_buffer.valid;
2845 log2num = (int)log2(slice_param->num_of_partitions - 1);
2847 BEGIN_BCS_BATCH(batch, 38);
2848 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2849 OUT_BCS_BATCH(batch,
2850 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2851 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2852 OUT_BCS_BATCH(batch,
2854 pic_param->pic_fields.bits.sharpness_level << 16 |
2855 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2856 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2857 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2858 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2859 (enable_segmentation &&
2860 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2861 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2862 (enable_segmentation &&
2863 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2864 (enable_segmentation &&
2865 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2866 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2867 pic_param->pic_fields.bits.filter_type << 4 |
2868 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2869 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2871 OUT_BCS_BATCH(batch,
2872 pic_param->loop_filter_level[3] << 24 |
2873 pic_param->loop_filter_level[2] << 16 |
2874 pic_param->loop_filter_level[1] << 8 |
2875 pic_param->loop_filter_level[0] << 0);
2877 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2878 for (i = 0; i < 4; i++) {
2879 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2880 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2881 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2882 /* 101581>>16 is equivalent to 155/100 */
2883 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2884 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2885 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2887 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2888 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2890 OUT_BCS_BATCH(batch,
2891 quantization_value[i][0] << 16 | /* Y1AC */
2892 quantization_value[i][1] << 0); /* Y1DC */
2893 OUT_BCS_BATCH(batch,
2894 quantization_value[i][5] << 16 | /* UVAC */
2895 quantization_value[i][4] << 0); /* UVDC */
2896 OUT_BCS_BATCH(batch,
2897 quantization_value[i][3] << 16 | /* Y2AC */
2898 quantization_value[i][2] << 0); /* Y2DC */
2901 /* CoeffProbability table for non-key frame, DW16-DW18 */
2903 OUT_BCS_RELOC64(batch, probs_bo,
2904 0, I915_GEM_DOMAIN_INSTRUCTION,
2906 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2908 OUT_BCS_BATCH(batch, 0);
2909 OUT_BCS_BATCH(batch, 0);
2910 OUT_BCS_BATCH(batch, 0);
2913 OUT_BCS_BATCH(batch,
2914 pic_param->mb_segment_tree_probs[2] << 16 |
2915 pic_param->mb_segment_tree_probs[1] << 8 |
2916 pic_param->mb_segment_tree_probs[0] << 0);
2918 OUT_BCS_BATCH(batch,
2919 pic_param->prob_skip_false << 24 |
2920 pic_param->prob_intra << 16 |
2921 pic_param->prob_last << 8 |
2922 pic_param->prob_gf << 0);
2924 OUT_BCS_BATCH(batch,
2925 pic_param->y_mode_probs[3] << 24 |
2926 pic_param->y_mode_probs[2] << 16 |
2927 pic_param->y_mode_probs[1] << 8 |
2928 pic_param->y_mode_probs[0] << 0);
2930 OUT_BCS_BATCH(batch,
2931 pic_param->uv_mode_probs[2] << 16 |
2932 pic_param->uv_mode_probs[1] << 8 |
2933 pic_param->uv_mode_probs[0] << 0);
2935 /* MV update value, DW23-DW32 */
2936 for (i = 0; i < 2; i++) {
2937 for (j = 0; j < 20; j += 4) {
2938 OUT_BCS_BATCH(batch,
2939 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2940 pic_param->mv_probs[i][j + 2] << 16 |
2941 pic_param->mv_probs[i][j + 1] << 8 |
2942 pic_param->mv_probs[i][j + 0] << 0);
2946 OUT_BCS_BATCH(batch,
2947 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2948 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2949 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2950 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2952 OUT_BCS_BATCH(batch,
2953 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2954 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2955 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2956 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2958 /* segmentation id stream base address, DW35-DW37 */
2959 if (enable_segmentation) {
2960 OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2961 0, I915_GEM_DOMAIN_INSTRUCTION,
2963 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2966 OUT_BCS_BATCH(batch, 0);
2967 OUT_BCS_BATCH(batch, 0);
2968 OUT_BCS_BATCH(batch, 0);
2970 ADVANCE_BCS_BATCH(batch);
2974 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2975 VAPictureParameterBufferVP8 *pic_param,
2976 VASliceParameterBufferVP8 *slice_param,
2977 dri_bo *slice_data_bo,
2978 struct gen7_mfd_context *gen7_mfd_context)
2980 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2982 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2983 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2984 unsigned int partition_size_0 = slice_param->partition_size[0];
2986 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2987 if (used_bits == 8) {
2990 partition_size_0 -= 1;
2993 assert(slice_param->num_of_partitions >= 2);
2994 assert(slice_param->num_of_partitions <= 9);
2996 log2num = (int)log2(slice_param->num_of_partitions - 1);
2998 BEGIN_BCS_BATCH(batch, 22);
2999 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3000 OUT_BCS_BATCH(batch,
3001 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3002 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
3004 (slice_param->macroblock_offset & 0x7));
3005 OUT_BCS_BATCH(batch,
3006 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3009 OUT_BCS_BATCH(batch, partition_size_0 + 1);
3010 OUT_BCS_BATCH(batch, offset);
3011 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3012 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3013 for (i = 1; i < 9; i++) {
3014 if (i < slice_param->num_of_partitions) {
3015 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3016 OUT_BCS_BATCH(batch, offset);
3018 OUT_BCS_BATCH(batch, 0);
3019 OUT_BCS_BATCH(batch, 0);
3022 offset += slice_param->partition_size[i];
3025 OUT_BCS_BATCH(batch, 0); /* concealment method */
3027 ADVANCE_BCS_BATCH(batch);
3031 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3032 struct decode_state *decode_state,
3033 struct gen7_mfd_context *gen7_mfd_context)
3035 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3036 VAPictureParameterBufferVP8 *pic_param;
3037 VASliceParameterBufferVP8 *slice_param;
3038 dri_bo *slice_data_bo;
3040 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3041 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3043 /* one slice per frame */
3044 if (decode_state->num_slice_params != 1 ||
3045 (!decode_state->slice_params ||
3046 !decode_state->slice_params[0] ||
3047 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3048 (!decode_state->slice_datas ||
3049 !decode_state->slice_datas[0] ||
3050 !decode_state->slice_datas[0]->bo) ||
3051 !decode_state->probability_data) {
3052 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3057 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3058 slice_data_bo = decode_state->slice_datas[0]->bo;
3060 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3061 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3062 intel_batchbuffer_emit_mi_flush(batch);
3063 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3064 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3065 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3066 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3067 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3068 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3069 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3070 intel_batchbuffer_end_atomic(batch);
3071 intel_batchbuffer_flush(batch);
3075 gen8_mfd_decode_picture(VADriverContextP ctx,
3077 union codec_state *codec_state,
3078 struct hw_context *hw_context)
3081 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3082 struct decode_state *decode_state = &codec_state->decode;
3085 assert(gen7_mfd_context);
3087 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3089 if (vaStatus != VA_STATUS_SUCCESS)
3092 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3095 case VAProfileMPEG2Simple:
3096 case VAProfileMPEG2Main:
3097 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3100 case VAProfileH264ConstrainedBaseline:
3101 case VAProfileH264Main:
3102 case VAProfileH264High:
3103 case VAProfileH264StereoHigh:
3104 case VAProfileH264MultiviewHigh:
3105 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3108 case VAProfileVC1Simple:
3109 case VAProfileVC1Main:
3110 case VAProfileVC1Advanced:
3111 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3114 case VAProfileJPEGBaseline:
3115 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3118 case VAProfileVP8Version0_3:
3119 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3127 vaStatus = VA_STATUS_SUCCESS;
3134 gen8_mfd_context_destroy(void *hw_context)
3136 VADriverContextP ctx;
3137 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3139 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3141 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3142 gen7_mfd_context->post_deblocking_output.bo = NULL;
3144 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3145 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3147 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3148 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3150 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3151 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3153 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3154 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3156 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3157 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3159 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3160 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3162 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3163 gen7_mfd_context->segmentation_buffer.bo = NULL;
3165 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3167 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3168 i965_DestroySurfaces(ctx,
3169 &gen7_mfd_context->jpeg_wa_surface_id,
3171 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3174 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3175 free(gen7_mfd_context);
3178 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3179 struct gen7_mfd_context *gen7_mfd_context)
3181 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3182 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3183 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3184 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3188 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3190 struct intel_driver_data *intel = intel_driver_data(ctx);
3191 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3194 if (!gen7_mfd_context)
3197 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3198 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3199 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3201 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3202 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3203 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3206 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3207 gen7_mfd_context->segmentation_buffer.valid = 0;
3209 switch (obj_config->profile) {
3210 case VAProfileMPEG2Simple:
3211 case VAProfileMPEG2Main:
3212 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3215 case VAProfileH264ConstrainedBaseline:
3216 case VAProfileH264Main:
3217 case VAProfileH264High:
3218 case VAProfileH264StereoHigh:
3219 case VAProfileH264MultiviewHigh:
3220 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3226 gen7_mfd_context->driver_context = ctx;
3227 return (struct hw_context *)gen7_mfd_context;