2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
78 if (!gen7_avc_surface)
81 gen7_avc_surface->base.frame_store_id = -1;
82 assert((obj_surface->size & 0x3f) == 0);
83 obj_surface->private_data = gen7_avc_surface;
86 /* DMV buffers now relate to the whole frame, irrespective of
88 if (gen7_avc_surface->dmv_top == NULL) {
89 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90 "direct mv w/r buffer",
91 width_in_mbs * height_in_mbs * 128,
93 assert(gen7_avc_surface->dmv_top);
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99 struct decode_state *decode_state,
101 struct gen7_mfd_context *gen7_mfd_context)
103 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
105 assert(standard_select == MFX_FORMAT_MPEG2 ||
106 standard_select == MFX_FORMAT_AVC ||
107 standard_select == MFX_FORMAT_VC1 ||
108 standard_select == MFX_FORMAT_JPEG ||
109 standard_select == MFX_FORMAT_VP8);
111 BEGIN_BCS_BATCH(batch, 5);
112 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
114 (MFX_LONG_MODE << 17) | /* Currently only support long format */
115 (MFD_MODE_VLD << 15) | /* VLD mode */
116 (0 << 10) | /* disable Stream-Out */
117 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
118 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
119 (0 << 5) | /* not in stitch mode */
120 (MFX_CODEC_DECODE << 4) | /* decoding mode */
121 (standard_select << 0));
123 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
124 (0 << 3) | /* terminate if AVC mbdata error occurs */
125 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
128 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129 OUT_BCS_BATCH(batch, 0); /* reserved */
130 ADVANCE_BCS_BATCH(batch);
134 gen8_mfd_surface_state(VADriverContextP ctx,
135 struct decode_state *decode_state,
137 struct gen7_mfd_context *gen7_mfd_context)
139 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140 struct object_surface *obj_surface = decode_state->render_object;
141 unsigned int y_cb_offset;
142 unsigned int y_cr_offset;
143 unsigned int surface_format;
147 y_cb_offset = obj_surface->y_cb_offset;
148 y_cr_offset = obj_surface->y_cr_offset;
150 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
153 BEGIN_BCS_BATCH(batch, 6);
154 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155 OUT_BCS_BATCH(batch, 0);
157 ((obj_surface->orig_height - 1) << 18) |
158 ((obj_surface->orig_width - 1) << 4));
160 (surface_format << 28) | /* 420 planar YUV surface */
161 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162 (0 << 22) | /* surface object control state, ignored */
163 ((obj_surface->width - 1) << 3) | /* pitch */
164 (0 << 2) | /* must be 0 */
165 (1 << 1) | /* must be tiled */
166 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
168 (0 << 16) | /* X offset for U(Cb), must be 0 */
169 (y_cb_offset << 0)); /* Y offset for U(Cb) */
171 (0 << 16) | /* X offset for V(Cr), must be 0 */
172 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173 ADVANCE_BCS_BATCH(batch);
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178 struct decode_state *decode_state,
180 struct gen7_mfd_context *gen7_mfd_context)
182 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
198 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
200 /* Post-debloing 4-6 */
201 if (gen7_mfd_context->post_deblocking_output.valid)
202 OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
206 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
212 /* uncompressed-video & stream out 7-12 */
213 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215 OUT_BCS_BATCH(batch, 0);
216 OUT_BCS_BATCH(batch, 0);
217 OUT_BCS_BATCH(batch, 0);
218 OUT_BCS_BATCH(batch, 0);
220 /* intra row-store scratch 13-15 */
221 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222 OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
232 /* deblocking-filter-row-store 16-18 */
233 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234 OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238 OUT_BCS_BATCH(batch, 0);
239 OUT_BCS_BATCH(batch, 0);
242 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
245 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246 struct object_surface *obj_surface;
248 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249 gen7_mfd_context->reference_surface[i].obj_surface &&
250 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
253 OUT_BCS_RELOC64(batch, obj_surface->bo,
254 I915_GEM_DOMAIN_INSTRUCTION, 0,
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
263 /* reference property 51 */
264 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266 /* Macroblock status & ILDB 52-57 */
267 OUT_BCS_BATCH(batch, 0);
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
274 /* the second Macroblock status 58-60 */
275 OUT_BCS_BATCH(batch, 0);
276 OUT_BCS_BATCH(batch, 0);
277 OUT_BCS_BATCH(batch, 0);
279 ADVANCE_BCS_BATCH(batch);
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284 dri_bo *slice_data_bo,
286 struct gen7_mfd_context *gen7_mfd_context)
288 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289 struct i965_driver_data *i965 = i965_driver_data(ctx);
291 BEGIN_BCS_BATCH(batch, 26);
292 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
294 OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296 /* Upper bound 4-5 */
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX indirect MV 6-10 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_COFF 11-15 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX IT_DBLK 16-20 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 /* MFX PAK_BSE object for encoder 21-25 */
322 OUT_BCS_BATCH(batch, 0);
323 OUT_BCS_BATCH(batch, 0);
324 OUT_BCS_BATCH(batch, 0);
325 OUT_BCS_BATCH(batch, 0);
326 OUT_BCS_BATCH(batch, 0);
328 ADVANCE_BCS_BATCH(batch);
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333 struct decode_state *decode_state,
335 struct gen7_mfd_context *gen7_mfd_context)
337 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338 struct i965_driver_data *i965 = i965_driver_data(ctx);
340 BEGIN_BCS_BATCH(batch, 10);
341 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
343 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344 OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348 OUT_BCS_BATCH(batch, 0);
349 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353 /* MPR Row Store Scratch buffer 4-6 */
354 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355 OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
359 OUT_BCS_BATCH(batch, 0);
360 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
366 if (gen7_mfd_context->bitplane_read_buffer.valid)
367 OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368 I915_GEM_DOMAIN_INSTRUCTION, 0,
371 OUT_BCS_BATCH(batch, 0);
372 OUT_BCS_BATCH(batch, 0);
374 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375 ADVANCE_BCS_BATCH(batch);
379 gen8_mfd_qm_state(VADriverContextP ctx,
383 struct gen7_mfd_context *gen7_mfd_context)
385 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386 unsigned int qm_buffer[16];
388 assert(qm_length <= 16 * 4);
389 memcpy(qm_buffer, qm, qm_length);
391 BEGIN_BCS_BATCH(batch, 18);
392 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393 OUT_BCS_BATCH(batch, qm_type << 0);
394 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395 ADVANCE_BCS_BATCH(batch);
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400 struct decode_state *decode_state,
401 struct gen7_mfd_context *gen7_mfd_context)
403 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
405 int mbaff_frame_flag;
406 unsigned int width_in_mbs, height_in_mbs;
407 VAPictureParameterBufferH264 *pic_param;
409 assert(decode_state->pic_param && decode_state->pic_param->buffer);
410 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
413 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
415 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
420 if ((img_struct & 0x1) == 0x1) {
421 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
423 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
426 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
430 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
433 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434 !pic_param->pic_fields.bits.field_pic_flag);
436 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
439 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
442 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
444 BEGIN_BCS_BATCH(batch, 17);
445 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
447 (width_in_mbs * height_in_mbs - 1));
449 ((height_in_mbs - 1) << 16) |
450 ((width_in_mbs - 1) << 0));
452 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
460 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467 (mbaff_frame_flag << 1) |
468 (pic_param->pic_fields.bits.field_pic_flag << 0));
469 OUT_BCS_BATCH(batch, 0);
470 OUT_BCS_BATCH(batch, 0);
471 OUT_BCS_BATCH(batch, 0);
472 OUT_BCS_BATCH(batch, 0);
473 OUT_BCS_BATCH(batch, 0);
474 OUT_BCS_BATCH(batch, 0);
475 OUT_BCS_BATCH(batch, 0);
476 OUT_BCS_BATCH(batch, 0);
477 OUT_BCS_BATCH(batch, 0);
478 OUT_BCS_BATCH(batch, 0);
479 OUT_BCS_BATCH(batch, 0);
480 OUT_BCS_BATCH(batch, 0);
481 ADVANCE_BCS_BATCH(batch);
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486 struct decode_state *decode_state,
487 struct gen7_mfd_context *gen7_mfd_context)
489 VAIQMatrixBufferH264 *iq_matrix;
490 VAPictureParameterBufferH264 *pic_param;
492 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
495 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
497 assert(decode_state->pic_param && decode_state->pic_param->buffer);
498 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
500 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
503 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511 struct decode_state *decode_state,
512 struct gen7_mfd_context *gen7_mfd_context)
514 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515 gen7_mfd_context->reference_surface);
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520 struct decode_state *decode_state,
521 VAPictureParameterBufferH264 *pic_param,
522 VASliceParameterBufferH264 *slice_param,
523 struct gen7_mfd_context *gen7_mfd_context)
525 struct i965_driver_data *i965 = i965_driver_data(ctx);
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
548 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
553 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
555 /* the current decoding frame/field */
556 va_pic = &pic_param->CurrPic;
557 obj_surface = decode_state->render_object;
558 assert(obj_surface->bo && obj_surface->private_data);
559 gen7_avc_surface = obj_surface->private_data;
561 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
565 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
568 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
572 const VAPictureH264 * const va_pic = avc_find_picture(
573 obj_surface->base.id, pic_param->ReferenceFrames,
574 ARRAY_ELEMS(pic_param->ReferenceFrames));
576 assert(va_pic != NULL);
577 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
580 OUT_BCS_BATCH(batch, 0);
581 OUT_BCS_BATCH(batch, 0);
585 va_pic = &pic_param->CurrPic;
586 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
589 ADVANCE_BCS_BATCH(batch);
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594 VAPictureParameterBufferH264 *pic_param,
595 VASliceParameterBufferH264 *next_slice_param,
596 struct gen7_mfd_context *gen7_mfd_context)
598 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603 VAPictureParameterBufferH264 *pic_param,
604 VASliceParameterBufferH264 *slice_param,
605 VASliceParameterBufferH264 *next_slice_param,
606 struct gen7_mfd_context *gen7_mfd_context)
608 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612 int num_ref_idx_l0, num_ref_idx_l1;
613 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
618 if (slice_param->slice_type == SLICE_TYPE_I ||
619 slice_param->slice_type == SLICE_TYPE_SI) {
620 slice_type = SLICE_TYPE_I;
621 } else if (slice_param->slice_type == SLICE_TYPE_P ||
622 slice_param->slice_type == SLICE_TYPE_SP) {
623 slice_type = SLICE_TYPE_P;
625 assert(slice_param->slice_type == SLICE_TYPE_B);
626 slice_type = SLICE_TYPE_B;
629 if (slice_type == SLICE_TYPE_I) {
630 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
634 } else if (slice_type == SLICE_TYPE_P) {
635 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
643 first_mb_in_slice = slice_param->first_mb_in_slice;
644 slice_hor_pos = first_mb_in_slice % width_in_mbs;
645 slice_ver_pos = first_mb_in_slice / width_in_mbs;
648 slice_ver_pos = slice_ver_pos << 1;
649 if (next_slice_param) {
650 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
655 next_slice_ver_pos = next_slice_ver_pos << 1;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839 gen7_mfd_context->reference_surface);
840 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843 assert(height_in_mbs > 0 && height_in_mbs <= 256);
845 /* Current decoded picture */
846 obj_surface = decode_state->render_object;
847 if (pic_param->pic_fields.bits.reference_pic_flag)
848 obj_surface->flags |= SURFACE_REFERENCED;
850 obj_surface->flags &= ~SURFACE_REFERENCED;
852 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
855 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
860 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
865 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866 bo = dri_bo_alloc(i965->intel.bufmgr,
871 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
874 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875 bo = dri_bo_alloc(i965->intel.bufmgr,
876 "deblocking filter row store",
877 width_in_mbs * 64 * 4,
880 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
883 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884 bo = dri_bo_alloc(i965->intel.bufmgr,
886 width_in_mbs * 64 * 2,
889 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
892 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893 bo = dri_bo_alloc(i965->intel.bufmgr,
895 width_in_mbs * 64 * 2,
898 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
901 gen7_mfd_context->bitplane_read_buffer.valid = 0;
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906 struct decode_state *decode_state,
907 struct gen7_mfd_context *gen7_mfd_context)
909 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910 VAPictureParameterBufferH264 *pic_param;
911 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912 dri_bo *slice_data_bo;
915 assert(decode_state->pic_param && decode_state->pic_param->buffer);
916 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
919 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920 intel_batchbuffer_emit_mi_flush(batch);
921 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
929 for (j = 0; j < decode_state->num_slice_params; j++) {
930 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932 slice_data_bo = decode_state->slice_datas[j]->bo;
933 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
935 if (j == decode_state->num_slice_params - 1)
936 next_slice_group_param = NULL;
938 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
940 if (j == 0 && slice_param->first_mb_in_slice)
941 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
943 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945 assert((slice_param->slice_type == SLICE_TYPE_I) ||
946 (slice_param->slice_type == SLICE_TYPE_SI) ||
947 (slice_param->slice_type == SLICE_TYPE_P) ||
948 (slice_param->slice_type == SLICE_TYPE_SP) ||
949 (slice_param->slice_type == SLICE_TYPE_B));
951 if (i < decode_state->slice_params[j]->num_elements - 1)
952 next_slice_param = slice_param + 1;
954 next_slice_param = next_slice_group_param;
956 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965 intel_batchbuffer_end_atomic(batch);
966 intel_batchbuffer_flush(batch);
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971 struct decode_state *decode_state,
972 struct gen7_mfd_context *gen7_mfd_context)
974 VAPictureParameterBufferMPEG2 *pic_param;
975 struct i965_driver_data *i965 = i965_driver_data(ctx);
976 struct object_surface *obj_surface;
978 unsigned int width_in_mbs;
980 assert(decode_state->pic_param && decode_state->pic_param->buffer);
981 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
984 mpeg2_set_reference_surfaces(
986 gen7_mfd_context->reference_surface,
991 /* Current decoded picture */
992 obj_surface = decode_state->render_object;
993 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
995 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998 gen7_mfd_context->pre_deblocking_output.valid = 1;
1000 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001 bo = dri_bo_alloc(i965->intel.bufmgr,
1002 "bsd mpc row store",
1006 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1009 gen7_mfd_context->post_deblocking_output.valid = 0;
1010 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018 struct decode_state *decode_state,
1019 struct gen7_mfd_context *gen7_mfd_context)
1021 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022 VAPictureParameterBufferMPEG2 *pic_param;
1023 unsigned int slice_concealment_disable_bit = 0;
1025 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1028 slice_concealment_disable_bit = 1;
1030 BEGIN_BCS_BATCH(batch, 13);
1031 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032 OUT_BCS_BATCH(batch,
1033 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045 OUT_BCS_BATCH(batch,
1046 pic_param->picture_coding_type << 9);
1047 OUT_BCS_BATCH(batch,
1048 (slice_concealment_disable_bit << 31) |
1049 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051 OUT_BCS_BATCH(batch, 0);
1052 OUT_BCS_BATCH(batch, 0);
1053 OUT_BCS_BATCH(batch, 0);
1054 OUT_BCS_BATCH(batch, 0);
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 ADVANCE_BCS_BATCH(batch);
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065 struct decode_state *decode_state,
1066 struct gen7_mfd_context *gen7_mfd_context)
1068 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1071 /* Update internal QM state */
1072 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073 VAIQMatrixBufferMPEG2 * const iq_matrix =
1074 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1076 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077 iq_matrix->load_intra_quantiser_matrix) {
1078 gen_iq_matrix->load_intra_quantiser_matrix =
1079 iq_matrix->load_intra_quantiser_matrix;
1080 if (iq_matrix->load_intra_quantiser_matrix) {
1081 for (j = 0; j < 64; j++)
1082 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083 iq_matrix->intra_quantiser_matrix[j];
1087 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088 iq_matrix->load_non_intra_quantiser_matrix) {
1089 gen_iq_matrix->load_non_intra_quantiser_matrix =
1090 iq_matrix->load_non_intra_quantiser_matrix;
1091 if (iq_matrix->load_non_intra_quantiser_matrix) {
1092 for (j = 0; j < 64; j++)
1093 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094 iq_matrix->non_intra_quantiser_matrix[j];
1099 /* Commit QM state to HW */
1100 for (i = 0; i < 2; i++) {
1101 unsigned char *qm = NULL;
1105 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106 qm = gen_iq_matrix->intra_quantiser_matrix;
1107 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1110 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1119 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125 VAPictureParameterBufferMPEG2 *pic_param,
1126 VASliceParameterBufferMPEG2 *slice_param,
1127 VASliceParameterBufferMPEG2 *next_slice_param,
1128 struct gen7_mfd_context *gen7_mfd_context)
1130 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1134 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1137 is_field_pic_wa = is_field_pic &&
1138 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1140 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141 hpos0 = slice_param->slice_horizontal_position;
1143 if (next_slice_param == NULL) {
1144 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1147 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148 hpos1 = next_slice_param->slice_horizontal_position;
1151 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1153 BEGIN_BCS_BATCH(batch, 5);
1154 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155 OUT_BCS_BATCH(batch,
1156 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157 OUT_BCS_BATCH(batch,
1158 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159 OUT_BCS_BATCH(batch,
1163 (next_slice_param == NULL) << 5 |
1164 (next_slice_param == NULL) << 3 |
1165 (slice_param->macroblock_offset & 0x7));
1166 OUT_BCS_BATCH(batch,
1167 (slice_param->quantiser_scale_code << 24) |
1168 (vpos1 << 8 | hpos1));
1169 ADVANCE_BCS_BATCH(batch);
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174 struct decode_state *decode_state,
1175 struct gen7_mfd_context *gen7_mfd_context)
1177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178 VAPictureParameterBufferMPEG2 *pic_param;
1179 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180 dri_bo *slice_data_bo;
1183 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1186 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188 intel_batchbuffer_emit_mi_flush(batch);
1189 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1196 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1200 for (j = 0; j < decode_state->num_slice_params; j++) {
1201 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203 slice_data_bo = decode_state->slice_datas[j]->bo;
1204 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1206 if (j == decode_state->num_slice_params - 1)
1207 next_slice_group_param = NULL;
1209 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1211 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1214 if (i < decode_state->slice_params[j]->num_elements - 1)
1215 next_slice_param = slice_param + 1;
1217 next_slice_param = next_slice_group_param;
1219 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224 intel_batchbuffer_end_atomic(batch);
1225 intel_batchbuffer_flush(batch);
1228 static const int va_to_gen7_vc1_pic_type[5] = {
1232 GEN7_VC1_BI_PICTURE,
1236 static const int va_to_gen7_vc1_mv[4] = {
1238 2, /* 1-MV half-pel */
1239 3, /* 1-MV half-pef bilinear */
1243 static const int b_picture_scale_factor[21] = {
1244 128, 85, 170, 64, 192,
1245 51, 102, 153, 204, 43,
1246 215, 37, 74, 111, 148,
1247 185, 222, 32, 96, 160,
1251 static const int va_to_gen7_vc1_condover[3] = {
1257 static const int va_to_gen7_vc1_profile[4] = {
1258 GEN7_VC1_SIMPLE_PROFILE,
1259 GEN7_VC1_MAIN_PROFILE,
1260 GEN7_VC1_RESERVED_PROFILE,
1261 GEN7_VC1_ADVANCED_PROFILE
1265 gen8_mfd_free_vc1_surface(void **data)
1267 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1269 if (!gen7_vc1_surface)
1272 dri_bo_unreference(gen7_vc1_surface->dmv);
1273 free(gen7_vc1_surface);
1278 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1279 VAPictureParameterBufferVC1 *pic_param,
1280 struct object_surface *obj_surface)
1282 struct i965_driver_data *i965 = i965_driver_data(ctx);
1283 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1284 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1287 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1289 if (!gen7_vc1_surface) {
1290 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1292 if (!gen7_vc1_surface)
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1301 if (gen7_vc1_surface->dmv == NULL) {
1302 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303 "direct mv w/r buffer",
1304 width_in_mbs * height_in_mbs * 64,
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311 struct decode_state *decode_state,
1312 struct gen7_mfd_context *gen7_mfd_context)
1314 VAPictureParameterBufferVC1 *pic_param;
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct object_surface *obj_surface;
1321 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324 picture_type = pic_param->picture_fields.bits.picture_type;
1326 intel_update_vc1_frame_store_index(ctx,
1329 gen7_mfd_context->reference_surface);
1331 /* Current decoded picture */
1332 obj_surface = decode_state->render_object;
1333 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1334 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1336 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1341 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1346 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347 bo = dri_bo_alloc(i965->intel.bufmgr,
1352 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1355 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356 bo = dri_bo_alloc(i965->intel.bufmgr,
1357 "deblocking filter row store",
1358 width_in_mbs * 7 * 64,
1361 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1364 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 "bsd mpc row store",
1370 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1373 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1375 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1378 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1383 uint8_t *src = NULL, *dst = NULL;
1385 assert(decode_state->bit_plane->buffer);
1386 src = decode_state->bit_plane->buffer;
1388 bo = dri_bo_alloc(i965->intel.bufmgr,
1390 bitplane_width * height_in_mbs,
1393 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1395 dri_bo_map(bo, True);
1396 assert(bo->virtual);
1399 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1401 int src_index, dst_index;
1405 src_index = (src_h * width_in_mbs + src_w) / 2;
1406 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407 src_value = ((src[src_index] >> src_shift) & 0xf);
1409 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1413 dst_index = src_w / 2;
1414 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1418 dst[src_w / 2] >>= 4;
1420 dst += bitplane_width;
1425 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430 struct decode_state *decode_state,
1431 struct gen7_mfd_context *gen7_mfd_context)
1433 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434 VAPictureParameterBufferVC1 *pic_param;
1435 struct object_surface *obj_surface;
1436 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438 int unified_mv_mode;
1439 int ref_field_pic_polarity = 0;
1440 int scale_factor = 0;
1442 int dmv_surface_valid = 0;
1448 int interpolation_mode = 0;
1450 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1453 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1463 alt_pquant_config = 0;
1464 alt_pquant_edge_mask = 0;
1465 } else if (dquant == 2) {
1466 alt_pquant_config = 1;
1467 alt_pquant_edge_mask = 0xf;
1469 assert(dquant == 1);
1470 if (dquantfrm == 0) {
1471 alt_pquant_config = 0;
1472 alt_pquant_edge_mask = 0;
1475 assert(dquantfrm == 1);
1476 alt_pquant_config = 1;
1478 switch (dqprofile) {
1480 if (dqbilevel == 0) {
1481 alt_pquant_config = 2;
1482 alt_pquant_edge_mask = 0;
1484 assert(dqbilevel == 1);
1485 alt_pquant_config = 3;
1486 alt_pquant_edge_mask = 0;
1491 alt_pquant_edge_mask = 0xf;
1496 alt_pquant_edge_mask = 0x9;
1498 alt_pquant_edge_mask = (0x3 << dqdbedge);
1503 alt_pquant_edge_mask = (0x1 << dqsbedge);
1512 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1516 assert(pic_param->mv_fields.bits.mv_mode < 4);
1517 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1520 if (pic_param->sequence_fields.bits.interlace == 1 &&
1521 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522 /* FIXME: calculate reference field picture polarity */
1524 ref_field_pic_polarity = 0;
1527 if (pic_param->b_picture_fraction < 21)
1528 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1530 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1532 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533 picture_type == GEN7_VC1_I_PICTURE)
1534 picture_type = GEN7_VC1_BI_PICTURE;
1536 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1539 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1542 * 8.3.6.2.1 Transform Type Selection
1543 * If variable-sized transform coding is not enabled,
1544 * then the 8x8 transform shall be used for all blocks.
1545 * it is also MFX_VC1_PIC_STATE requirement.
1547 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1549 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1553 if (picture_type == GEN7_VC1_B_PICTURE) {
1554 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1556 obj_surface = decode_state->reference_objects[1];
1559 gen7_vc1_surface = obj_surface->private_data;
1561 if (!gen7_vc1_surface ||
1562 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564 dmv_surface_valid = 0;
1566 dmv_surface_valid = 1;
1569 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1571 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1574 if (pic_param->picture_fields.bits.top_field_first)
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581 brfd = pic_param->reference_fields.bits.reference_distance;
1582 brfd = (scale_factor * brfd) >> 8;
1583 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1589 overlap = pic_param->sequence_fields.bits.overlap;
1593 if (profile != GEN7_VC1_ADVANCED_PROFILE) {
1594 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1595 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1599 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1600 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1603 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1604 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE) {
1605 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1607 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1608 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1615 assert(pic_param->conditional_overlap_flag < 3);
1616 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1618 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1619 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1621 interpolation_mode = 9; /* Half-pel bilinear */
1622 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1623 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1624 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1625 interpolation_mode = 1; /* Half-pel bicubic */
1627 interpolation_mode = 0; /* Quarter-pel bicubic */
1629 BEGIN_BCS_BATCH(batch, 6);
1630 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1631 OUT_BCS_BATCH(batch,
1632 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1633 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1634 OUT_BCS_BATCH(batch,
1635 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1636 dmv_surface_valid << 15 |
1637 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1638 pic_param->rounding_control << 13 |
1639 pic_param->sequence_fields.bits.syncmarker << 12 |
1640 interpolation_mode << 8 |
1641 0 << 7 | /* FIXME: scale up or down ??? */
1642 pic_param->range_reduction_frame << 6 |
1643 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1645 !pic_param->picture_fields.bits.is_first_field << 3 |
1646 (pic_param->sequence_fields.bits.profile == 3) << 0);
1647 OUT_BCS_BATCH(batch,
1648 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1649 picture_type << 26 |
1652 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1654 OUT_BCS_BATCH(batch,
1655 unified_mv_mode << 28 |
1656 pic_param->mv_fields.bits.four_mv_switch << 27 |
1657 pic_param->fast_uvmc_flag << 26 |
1658 ref_field_pic_polarity << 25 |
1659 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1660 pic_param->reference_fields.bits.reference_distance << 20 |
1661 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1662 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1663 pic_param->mv_fields.bits.extended_mv_range << 8 |
1664 alt_pquant_edge_mask << 4 |
1665 alt_pquant_config << 2 |
1666 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1667 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1668 OUT_BCS_BATCH(batch,
1669 !!pic_param->bitplane_present.value << 31 |
1670 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1671 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1672 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1673 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1674 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1675 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1676 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1677 pic_param->mv_fields.bits.mv_table << 20 |
1678 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1679 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1680 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1681 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1682 pic_param->mb_mode_table << 8 |
1684 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1685 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1686 pic_param->cbp_table << 0);
1687 ADVANCE_BCS_BATCH(batch);
1691 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1692 struct decode_state *decode_state,
1693 struct gen7_mfd_context *gen7_mfd_context)
1695 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1696 VAPictureParameterBufferVC1 *pic_param;
1697 int intensitycomp_single;
1699 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1700 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1701 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1703 BEGIN_BCS_BATCH(batch, 6);
1704 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1705 OUT_BCS_BATCH(batch,
1706 0 << 14 | /* FIXME: double ??? */
1708 intensitycomp_single << 10 |
1709 intensitycomp_single << 8 |
1710 0 << 4 | /* FIXME: interlace mode */
1712 OUT_BCS_BATCH(batch,
1713 pic_param->luma_shift << 16 |
1714 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1717 OUT_BCS_BATCH(batch, 0);
1718 ADVANCE_BCS_BATCH(batch);
1722 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1723 struct decode_state *decode_state,
1724 struct gen7_mfd_context *gen7_mfd_context)
1726 struct i965_driver_data *i965 = i965_driver_data(ctx);
1727 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1728 struct object_surface *obj_surface;
1729 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1731 obj_surface = decode_state->render_object;
1733 if (obj_surface && obj_surface->private_data) {
1734 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1737 obj_surface = decode_state->reference_objects[1];
1739 if (obj_surface && obj_surface->private_data) {
1740 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1743 BEGIN_BCS_BATCH(batch, 7);
1744 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1746 if (dmv_write_buffer)
1747 OUT_BCS_RELOC64(batch, dmv_write_buffer,
1748 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1755 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1757 if (dmv_read_buffer)
1758 OUT_BCS_RELOC64(batch, dmv_read_buffer,
1759 I915_GEM_DOMAIN_INSTRUCTION, 0,
1762 OUT_BCS_BATCH(batch, 0);
1763 OUT_BCS_BATCH(batch, 0);
1766 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1768 ADVANCE_BCS_BATCH(batch);
1772 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1774 int out_slice_data_bit_offset;
1775 int slice_header_size = in_slice_data_bit_offset / 8;
1779 out_slice_data_bit_offset = in_slice_data_bit_offset;
1781 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1782 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1787 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1790 return out_slice_data_bit_offset;
1794 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1795 VAPictureParameterBufferVC1 *pic_param,
1796 VASliceParameterBufferVC1 *slice_param,
1797 VASliceParameterBufferVC1 *next_slice_param,
1798 dri_bo *slice_data_bo,
1799 struct gen7_mfd_context *gen7_mfd_context)
1801 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1802 int next_slice_start_vert_pos;
1803 int macroblock_offset;
1804 uint8_t *slice_data = NULL;
1806 dri_bo_map(slice_data_bo, 0);
1807 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1808 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1809 slice_param->macroblock_offset,
1810 pic_param->sequence_fields.bits.profile);
1811 dri_bo_unmap(slice_data_bo);
1813 if (next_slice_param)
1814 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1816 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1818 BEGIN_BCS_BATCH(batch, 5);
1819 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1820 OUT_BCS_BATCH(batch,
1821 slice_param->slice_data_size - (macroblock_offset >> 3));
1822 OUT_BCS_BATCH(batch,
1823 slice_param->slice_data_offset + (macroblock_offset >> 3));
1824 OUT_BCS_BATCH(batch,
1825 slice_param->slice_vertical_position << 16 |
1826 next_slice_start_vert_pos << 0);
1827 OUT_BCS_BATCH(batch,
1828 (macroblock_offset & 0x7));
1829 ADVANCE_BCS_BATCH(batch);
1833 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1834 struct decode_state *decode_state,
1835 struct gen7_mfd_context *gen7_mfd_context)
1837 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1838 VAPictureParameterBufferVC1 *pic_param;
1839 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1840 dri_bo *slice_data_bo;
1843 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1844 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1846 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1847 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1848 intel_batchbuffer_emit_mi_flush(batch);
1849 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1850 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1851 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1852 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1853 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1854 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1855 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1857 for (j = 0; j < decode_state->num_slice_params; j++) {
1858 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1859 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1860 slice_data_bo = decode_state->slice_datas[j]->bo;
1861 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1863 if (j == decode_state->num_slice_params - 1)
1864 next_slice_group_param = NULL;
1866 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1868 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1869 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1871 if (i < decode_state->slice_params[j]->num_elements - 1)
1872 next_slice_param = slice_param + 1;
1874 next_slice_param = next_slice_group_param;
1876 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1881 intel_batchbuffer_end_atomic(batch);
1882 intel_batchbuffer_flush(batch);
1886 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1887 struct decode_state *decode_state,
1888 struct gen7_mfd_context *gen7_mfd_context)
1890 struct object_surface *obj_surface;
1891 VAPictureParameterBufferJPEGBaseline *pic_param;
1892 int subsampling = SUBSAMPLE_YUV420;
1893 int fourcc = VA_FOURCC_IMC3;
1895 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1897 if (pic_param->num_components == 1) {
1898 subsampling = SUBSAMPLE_YUV400;
1899 fourcc = VA_FOURCC_Y800;
1900 } else if (pic_param->num_components == 3) {
1901 int h1 = pic_param->components[0].h_sampling_factor;
1902 int h2 = pic_param->components[1].h_sampling_factor;
1903 int h3 = pic_param->components[2].h_sampling_factor;
1904 int v1 = pic_param->components[0].v_sampling_factor;
1905 int v2 = pic_param->components[1].v_sampling_factor;
1906 int v3 = pic_param->components[2].v_sampling_factor;
1908 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1909 v1 == 2 && v2 == 1 && v3 == 1) {
1910 subsampling = SUBSAMPLE_YUV420;
1911 fourcc = VA_FOURCC_IMC3;
1912 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1913 v1 == 1 && v2 == 1 && v3 == 1) {
1914 subsampling = SUBSAMPLE_YUV422H;
1915 fourcc = VA_FOURCC_422H;
1916 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1917 v1 == 1 && v2 == 1 && v3 == 1) {
1918 subsampling = SUBSAMPLE_YUV444;
1919 fourcc = VA_FOURCC_444P;
1920 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1921 v1 == 1 && v2 == 1 && v3 == 1) {
1922 subsampling = SUBSAMPLE_YUV411;
1923 fourcc = VA_FOURCC_411P;
1924 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1925 v1 == 2 && v2 == 1 && v3 == 1) {
1926 subsampling = SUBSAMPLE_YUV422V;
1927 fourcc = VA_FOURCC_422V;
1928 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1929 v1 == 2 && v2 == 2 && v3 == 2) {
1930 subsampling = SUBSAMPLE_YUV422H;
1931 fourcc = VA_FOURCC_422H;
1932 } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1933 v1 == 2 && v2 == 1 && v3 == 1) {
1934 subsampling = SUBSAMPLE_YUV422V;
1935 fourcc = VA_FOURCC_422V;
1942 /* Current decoded picture */
1943 obj_surface = decode_state->render_object;
1944 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1946 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1947 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1948 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1949 gen7_mfd_context->pre_deblocking_output.valid = 1;
1951 gen7_mfd_context->post_deblocking_output.bo = NULL;
1952 gen7_mfd_context->post_deblocking_output.valid = 0;
1954 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1955 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1957 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1958 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1960 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1961 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1963 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1964 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1966 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1967 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1970 static const int va_to_gen7_jpeg_rotation[4] = {
1971 GEN7_JPEG_ROTATION_0,
1972 GEN7_JPEG_ROTATION_90,
1973 GEN7_JPEG_ROTATION_180,
1974 GEN7_JPEG_ROTATION_270
1978 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1979 struct decode_state *decode_state,
1980 struct gen7_mfd_context *gen7_mfd_context)
1982 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1983 VAPictureParameterBufferJPEGBaseline *pic_param;
1984 int chroma_type = GEN7_YUV420;
1985 int frame_width_in_blks;
1986 int frame_height_in_blks;
1988 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1989 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1991 if (pic_param->num_components == 1)
1992 chroma_type = GEN7_YUV400;
1993 else if (pic_param->num_components == 3) {
1994 int h1 = pic_param->components[0].h_sampling_factor;
1995 int h2 = pic_param->components[1].h_sampling_factor;
1996 int h3 = pic_param->components[2].h_sampling_factor;
1997 int v1 = pic_param->components[0].v_sampling_factor;
1998 int v2 = pic_param->components[1].v_sampling_factor;
1999 int v3 = pic_param->components[2].v_sampling_factor;
2001 if (h1 == 2 && h2 == 1 && h3 == 1 &&
2002 v1 == 2 && v2 == 1 && v3 == 1)
2003 chroma_type = GEN7_YUV420;
2004 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2005 v1 == 1 && v2 == 1 && v3 == 1)
2006 chroma_type = GEN7_YUV422H_2Y;
2007 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2008 v1 == 1 && v2 == 1 && v3 == 1)
2009 chroma_type = GEN7_YUV444;
2010 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2011 v1 == 1 && v2 == 1 && v3 == 1)
2012 chroma_type = GEN7_YUV411;
2013 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2014 v1 == 2 && v2 == 1 && v3 == 1)
2015 chroma_type = GEN7_YUV422V_2Y;
2016 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2017 v1 == 2 && v2 == 2 && v3 == 2)
2018 chroma_type = GEN7_YUV422H_4Y;
2019 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2020 v1 == 2 && v2 == 1 && v3 == 1)
2021 chroma_type = GEN7_YUV422V_4Y;
2026 if (chroma_type == GEN7_YUV400 ||
2027 chroma_type == GEN7_YUV444 ||
2028 chroma_type == GEN7_YUV422V_2Y) {
2029 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2030 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2031 } else if (chroma_type == GEN7_YUV411) {
2032 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2033 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2035 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2036 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2039 BEGIN_BCS_BATCH(batch, 3);
2040 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2041 OUT_BCS_BATCH(batch,
2042 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2043 (chroma_type << 0));
2044 OUT_BCS_BATCH(batch,
2045 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2046 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2047 ADVANCE_BCS_BATCH(batch);
2050 static const int va_to_gen7_jpeg_hufftable[2] = {
2056 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2057 struct decode_state *decode_state,
2058 struct gen7_mfd_context *gen7_mfd_context,
2061 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2062 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2065 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2068 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2070 for (index = 0; index < num_tables; index++) {
2071 int id = va_to_gen7_jpeg_hufftable[index];
2072 if (!huffman_table->load_huffman_table[index])
2074 BEGIN_BCS_BATCH(batch, 53);
2075 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2076 OUT_BCS_BATCH(batch, id);
2077 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2078 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2079 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2080 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2081 ADVANCE_BCS_BATCH(batch);
2085 static const int va_to_gen7_jpeg_qm[5] = {
2087 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2088 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2089 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2090 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2094 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2095 struct decode_state *decode_state,
2096 struct gen7_mfd_context *gen7_mfd_context)
2098 VAPictureParameterBufferJPEGBaseline *pic_param;
2099 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2102 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2105 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2106 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2108 assert(pic_param->num_components <= 3);
2110 for (index = 0; index < pic_param->num_components; index++) {
2111 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2113 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2114 unsigned char raster_qm[64];
2117 if (id > 4 || id < 1)
2120 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2123 qm_type = va_to_gen7_jpeg_qm[id];
2125 for (j = 0; j < 64; j++)
2126 raster_qm[zigzag_direct[j]] = qm[j];
2128 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2133 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2134 VAPictureParameterBufferJPEGBaseline *pic_param,
2135 VASliceParameterBufferJPEGBaseline *slice_param,
2136 VASliceParameterBufferJPEGBaseline *next_slice_param,
2137 dri_bo *slice_data_bo,
2138 struct gen7_mfd_context *gen7_mfd_context)
2140 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2141 int scan_component_mask = 0;
2144 assert(slice_param->num_components > 0);
2145 assert(slice_param->num_components < 4);
2146 assert(slice_param->num_components <= pic_param->num_components);
2148 for (i = 0; i < slice_param->num_components; i++) {
2149 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2151 scan_component_mask |= (1 << 0);
2154 scan_component_mask |= (1 << 1);
2157 scan_component_mask |= (1 << 2);
2165 BEGIN_BCS_BATCH(batch, 6);
2166 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2167 OUT_BCS_BATCH(batch,
2168 slice_param->slice_data_size);
2169 OUT_BCS_BATCH(batch,
2170 slice_param->slice_data_offset);
2171 OUT_BCS_BATCH(batch,
2172 slice_param->slice_horizontal_position << 16 |
2173 slice_param->slice_vertical_position << 0);
2174 OUT_BCS_BATCH(batch,
2175 ((slice_param->num_components != 1) << 30) | /* interleaved */
2176 (scan_component_mask << 27) | /* scan components */
2177 (0 << 26) | /* disable interrupt allowed */
2178 (slice_param->num_mcus << 0)); /* MCU count */
2179 OUT_BCS_BATCH(batch,
2180 (slice_param->restart_interval << 0)); /* RestartInterval */
2181 ADVANCE_BCS_BATCH(batch);
2184 /* Workaround for JPEG decoding on Ivybridge */
2190 unsigned char data[32];
2192 int data_bit_offset;
2194 } gen7_jpeg_wa_clip = {
2198 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2199 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2207 gen8_jpeg_wa_init(VADriverContextP ctx,
2208 struct gen7_mfd_context *gen7_mfd_context)
2210 struct i965_driver_data *i965 = i965_driver_data(ctx);
2212 struct object_surface *obj_surface;
2214 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2215 i965_DestroySurfaces(ctx,
2216 &gen7_mfd_context->jpeg_wa_surface_id,
2219 status = i965_CreateSurfaces(ctx,
2220 gen7_jpeg_wa_clip.width,
2221 gen7_jpeg_wa_clip.height,
2222 VA_RT_FORMAT_YUV420,
2224 &gen7_mfd_context->jpeg_wa_surface_id);
2225 assert(status == VA_STATUS_SUCCESS);
2227 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2228 assert(obj_surface);
2229 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2230 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2232 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2233 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2237 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2239 gen7_jpeg_wa_clip.data_size,
2240 gen7_jpeg_wa_clip.data);
2245 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2246 struct gen7_mfd_context *gen7_mfd_context)
2248 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2250 BEGIN_BCS_BATCH(batch, 5);
2251 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2252 OUT_BCS_BATCH(batch,
2253 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2254 (MFD_MODE_VLD << 15) | /* VLD mode */
2255 (0 << 10) | /* disable Stream-Out */
2256 (0 << 9) | /* Post Deblocking Output */
2257 (1 << 8) | /* Pre Deblocking Output */
2258 (0 << 5) | /* not in stitch mode */
2259 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2260 (MFX_FORMAT_AVC << 0));
2261 OUT_BCS_BATCH(batch,
2262 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2263 (0 << 3) | /* terminate if AVC mbdata error occurs */
2264 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2267 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2268 OUT_BCS_BATCH(batch, 0); /* reserved */
2269 ADVANCE_BCS_BATCH(batch);
2273 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2274 struct gen7_mfd_context *gen7_mfd_context)
2276 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2277 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2279 BEGIN_BCS_BATCH(batch, 6);
2280 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2281 OUT_BCS_BATCH(batch, 0);
2282 OUT_BCS_BATCH(batch,
2283 ((obj_surface->orig_width - 1) << 18) |
2284 ((obj_surface->orig_height - 1) << 4));
2285 OUT_BCS_BATCH(batch,
2286 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2287 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2288 (0 << 22) | /* surface object control state, ignored */
2289 ((obj_surface->width - 1) << 3) | /* pitch */
2290 (0 << 2) | /* must be 0 */
2291 (1 << 1) | /* must be tiled */
2292 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2293 OUT_BCS_BATCH(batch,
2294 (0 << 16) | /* X offset for U(Cb), must be 0 */
2295 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2296 OUT_BCS_BATCH(batch,
2297 (0 << 16) | /* X offset for V(Cr), must be 0 */
2298 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2299 ADVANCE_BCS_BATCH(batch);
2303 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2304 struct gen7_mfd_context *gen7_mfd_context)
2306 struct i965_driver_data *i965 = i965_driver_data(ctx);
2307 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2308 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2312 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2317 BEGIN_BCS_BATCH(batch, 61);
2318 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2319 OUT_BCS_RELOC64(batch,
2321 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2323 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2326 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2327 OUT_BCS_BATCH(batch, 0);
2328 OUT_BCS_BATCH(batch, 0);
2330 /* uncompressed-video & stream out 7-12 */
2331 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333 OUT_BCS_BATCH(batch, 0);
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2338 /* the DW 13-15 is for intra row store scratch */
2339 OUT_BCS_RELOC64(batch,
2341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2344 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2346 /* the DW 16-18 is for deblocking filter */
2347 OUT_BCS_BATCH(batch, 0);
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2352 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2353 OUT_BCS_BATCH(batch, 0);
2354 OUT_BCS_BATCH(batch, 0);
2356 OUT_BCS_BATCH(batch, 0);
2358 /* the DW52-54 is for mb status address */
2359 OUT_BCS_BATCH(batch, 0);
2360 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2362 /* the DW56-60 is for ILDB & second ILDB address */
2363 OUT_BCS_BATCH(batch, 0);
2364 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_BATCH(batch, 0);
2366 OUT_BCS_BATCH(batch, 0);
2367 OUT_BCS_BATCH(batch, 0);
2368 OUT_BCS_BATCH(batch, 0);
2370 ADVANCE_BCS_BATCH(batch);
2372 dri_bo_unreference(intra_bo);
2376 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2377 struct gen7_mfd_context *gen7_mfd_context)
2379 struct i965_driver_data *i965 = i965_driver_data(ctx);
2380 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2381 dri_bo *bsd_mpc_bo, *mpr_bo;
2383 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2384 "bsd mpc row store",
2385 11520, /* 1.5 * 120 * 64 */
2388 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2390 7680, /* 1. 0 * 120 * 64 */
2393 BEGIN_BCS_BATCH(batch, 10);
2394 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2396 OUT_BCS_RELOC64(batch,
2398 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2401 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2403 OUT_BCS_RELOC64(batch,
2405 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2407 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2409 OUT_BCS_BATCH(batch, 0);
2410 OUT_BCS_BATCH(batch, 0);
2411 OUT_BCS_BATCH(batch, 0);
2413 ADVANCE_BCS_BATCH(batch);
2415 dri_bo_unreference(bsd_mpc_bo);
2416 dri_bo_unreference(mpr_bo);
2420 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2421 struct gen7_mfd_context *gen7_mfd_context)
2427 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2428 struct gen7_mfd_context *gen7_mfd_context)
2430 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2432 int mbaff_frame_flag = 0;
2433 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2435 BEGIN_BCS_BATCH(batch, 16);
2436 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2437 OUT_BCS_BATCH(batch,
2438 width_in_mbs * height_in_mbs);
2439 OUT_BCS_BATCH(batch,
2440 ((height_in_mbs - 1) << 16) |
2441 ((width_in_mbs - 1) << 0));
2442 OUT_BCS_BATCH(batch,
2447 (0 << 12) | /* differ from GEN6 */
2450 OUT_BCS_BATCH(batch,
2451 (1 << 10) | /* 4:2:0 */
2452 (1 << 7) | /* CABAC */
2458 (mbaff_frame_flag << 1) |
2460 OUT_BCS_BATCH(batch, 0);
2461 OUT_BCS_BATCH(batch, 0);
2462 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2464 OUT_BCS_BATCH(batch, 0);
2465 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2467 OUT_BCS_BATCH(batch, 0);
2468 OUT_BCS_BATCH(batch, 0);
2469 OUT_BCS_BATCH(batch, 0);
2470 OUT_BCS_BATCH(batch, 0);
2471 ADVANCE_BCS_BATCH(batch);
2475 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2476 struct gen7_mfd_context *gen7_mfd_context)
2478 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2481 BEGIN_BCS_BATCH(batch, 71);
2482 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2484 /* reference surfaces 0..15 */
2485 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2486 OUT_BCS_BATCH(batch, 0); /* top */
2487 OUT_BCS_BATCH(batch, 0); /* bottom */
2490 OUT_BCS_BATCH(batch, 0);
2492 /* the current decoding frame/field */
2493 OUT_BCS_BATCH(batch, 0); /* top */
2494 OUT_BCS_BATCH(batch, 0);
2495 OUT_BCS_BATCH(batch, 0);
2498 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2499 OUT_BCS_BATCH(batch, 0);
2500 OUT_BCS_BATCH(batch, 0);
2503 OUT_BCS_BATCH(batch, 0);
2504 OUT_BCS_BATCH(batch, 0);
2506 ADVANCE_BCS_BATCH(batch);
2510 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2511 struct gen7_mfd_context *gen7_mfd_context)
2513 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2515 BEGIN_BCS_BATCH(batch, 11);
2516 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2517 OUT_BCS_RELOC64(batch,
2518 gen7_mfd_context->jpeg_wa_slice_data_bo,
2519 I915_GEM_DOMAIN_INSTRUCTION, 0,
2521 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2522 OUT_BCS_BATCH(batch, 0);
2523 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2524 OUT_BCS_BATCH(batch, 0);
2525 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2526 OUT_BCS_BATCH(batch, 0);
2527 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2528 OUT_BCS_BATCH(batch, 0);
2529 ADVANCE_BCS_BATCH(batch);
2533 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2534 struct gen7_mfd_context *gen7_mfd_context)
2536 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2538 /* the input bitsteam format on GEN7 differs from GEN6 */
2539 BEGIN_BCS_BATCH(batch, 6);
2540 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2541 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2542 OUT_BCS_BATCH(batch, 0);
2543 OUT_BCS_BATCH(batch,
2549 OUT_BCS_BATCH(batch,
2550 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2553 (1 << 3) | /* LastSlice Flag */
2554 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2555 OUT_BCS_BATCH(batch, 0);
2556 ADVANCE_BCS_BATCH(batch);
2560 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2561 struct gen7_mfd_context *gen7_mfd_context)
2563 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2564 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2565 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2566 int first_mb_in_slice = 0;
2567 int slice_type = SLICE_TYPE_I;
2569 BEGIN_BCS_BATCH(batch, 11);
2570 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2571 OUT_BCS_BATCH(batch, slice_type);
2572 OUT_BCS_BATCH(batch,
2573 (num_ref_idx_l1 << 24) |
2574 (num_ref_idx_l0 << 16) |
2577 OUT_BCS_BATCH(batch,
2579 (1 << 27) | /* disable Deblocking */
2581 (gen7_jpeg_wa_clip.qp << 16) |
2584 OUT_BCS_BATCH(batch,
2585 (slice_ver_pos << 24) |
2586 (slice_hor_pos << 16) |
2587 (first_mb_in_slice << 0));
2588 OUT_BCS_BATCH(batch,
2589 (next_slice_ver_pos << 16) |
2590 (next_slice_hor_pos << 0));
2591 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2592 OUT_BCS_BATCH(batch, 0);
2593 OUT_BCS_BATCH(batch, 0);
2594 OUT_BCS_BATCH(batch, 0);
2595 OUT_BCS_BATCH(batch, 0);
2596 ADVANCE_BCS_BATCH(batch);
2600 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2601 struct gen7_mfd_context *gen7_mfd_context)
2603 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2604 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2605 intel_batchbuffer_emit_mi_flush(batch);
2606 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2607 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2608 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2609 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2610 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2611 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2612 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2614 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2615 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2616 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2622 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2623 struct decode_state *decode_state,
2624 struct gen7_mfd_context *gen7_mfd_context)
2626 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2627 VAPictureParameterBufferJPEGBaseline *pic_param;
2628 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2629 dri_bo *slice_data_bo;
2630 int i, j, max_selector = 0;
2632 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2633 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2635 /* Currently only support Baseline DCT */
2636 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2637 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2639 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2641 intel_batchbuffer_emit_mi_flush(batch);
2642 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2643 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2644 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2645 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2646 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2648 for (j = 0; j < decode_state->num_slice_params; j++) {
2649 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2650 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2651 slice_data_bo = decode_state->slice_datas[j]->bo;
2652 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2654 if (j == decode_state->num_slice_params - 1)
2655 next_slice_group_param = NULL;
2657 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2659 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2662 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2664 if (i < decode_state->slice_params[j]->num_elements - 1)
2665 next_slice_param = slice_param + 1;
2667 next_slice_param = next_slice_group_param;
2669 for (component = 0; component < slice_param->num_components; component++) {
2670 if (max_selector < slice_param->components[component].dc_table_selector)
2671 max_selector = slice_param->components[component].dc_table_selector;
2673 if (max_selector < slice_param->components[component].ac_table_selector)
2674 max_selector = slice_param->components[component].ac_table_selector;
2681 assert(max_selector < 2);
2682 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2684 for (j = 0; j < decode_state->num_slice_params; j++) {
2685 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2686 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2687 slice_data_bo = decode_state->slice_datas[j]->bo;
2688 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2690 if (j == decode_state->num_slice_params - 1)
2691 next_slice_group_param = NULL;
2693 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2695 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2696 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2698 if (i < decode_state->slice_params[j]->num_elements - 1)
2699 next_slice_param = slice_param + 1;
2701 next_slice_param = next_slice_group_param;
2703 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2708 intel_batchbuffer_end_atomic(batch);
2709 intel_batchbuffer_flush(batch);
2712 static const int vp8_dc_qlookup[128] = {
2713 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2714 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2715 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2716 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2717 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2718 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2719 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2720 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2723 static const int vp8_ac_qlookup[128] = {
2724 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2725 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2726 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2727 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2728 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2729 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2730 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2731 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2734 static inline unsigned int vp8_clip_quantization_index(int index)
2745 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2746 struct decode_state *decode_state,
2747 struct gen7_mfd_context *gen7_mfd_context)
2749 struct object_surface *obj_surface;
2750 struct i965_driver_data *i965 = i965_driver_data(ctx);
2752 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2753 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2754 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2756 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2757 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2759 intel_update_vp8_frame_store_index(ctx,
2762 gen7_mfd_context->reference_surface);
2764 /* Current decoded picture */
2765 obj_surface = decode_state->render_object;
2766 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2768 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2769 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2770 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2771 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2773 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2774 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2775 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2776 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2778 intel_ensure_vp8_segmentation_buffer(ctx,
2779 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2781 /* The same as AVC */
2782 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2783 bo = dri_bo_alloc(i965->intel.bufmgr,
2788 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2789 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2791 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2792 bo = dri_bo_alloc(i965->intel.bufmgr,
2793 "deblocking filter row store",
2794 width_in_mbs * 64 * 4,
2797 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2798 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2800 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2801 bo = dri_bo_alloc(i965->intel.bufmgr,
2802 "bsd mpc row store",
2803 width_in_mbs * 64 * 2,
2806 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2807 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2809 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2810 bo = dri_bo_alloc(i965->intel.bufmgr,
2812 width_in_mbs * 64 * 2,
2815 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2816 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2818 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2822 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2823 struct decode_state *decode_state,
2824 struct gen7_mfd_context *gen7_mfd_context)
2826 struct i965_driver_data *i965 = i965_driver_data(ctx);
2827 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2828 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2829 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2830 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2831 dri_bo *probs_bo = decode_state->probability_data->bo;
2833 unsigned int quantization_value[4][6];
2835 /* There is no safe way to error out if the segmentation buffer
2836 could not be allocated. So, instead of aborting, simply decode
2837 something even if the result may look totally inacurate */
2838 const unsigned int enable_segmentation =
2839 pic_param->pic_fields.bits.segmentation_enabled &&
2840 gen7_mfd_context->segmentation_buffer.valid;
2842 log2num = (int)log2(slice_param->num_of_partitions - 1);
2844 BEGIN_BCS_BATCH(batch, 38);
2845 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2846 OUT_BCS_BATCH(batch,
2847 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2848 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2849 OUT_BCS_BATCH(batch,
2851 pic_param->pic_fields.bits.sharpness_level << 16 |
2852 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2853 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2854 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2855 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2856 (enable_segmentation &&
2857 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2858 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2859 (enable_segmentation &&
2860 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2861 (enable_segmentation &&
2862 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2863 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2864 pic_param->pic_fields.bits.filter_type << 4 |
2865 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2866 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2868 OUT_BCS_BATCH(batch,
2869 pic_param->loop_filter_level[3] << 24 |
2870 pic_param->loop_filter_level[2] << 16 |
2871 pic_param->loop_filter_level[1] << 8 |
2872 pic_param->loop_filter_level[0] << 0);
2874 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2875 for (i = 0; i < 4; i++) {
2876 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2877 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2878 quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2879 /* 101581>>16 is equivalent to 155/100 */
2880 quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2881 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2882 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2884 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2885 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2887 OUT_BCS_BATCH(batch,
2888 quantization_value[i][0] << 16 | /* Y1AC */
2889 quantization_value[i][1] << 0); /* Y1DC */
2890 OUT_BCS_BATCH(batch,
2891 quantization_value[i][5] << 16 | /* UVAC */
2892 quantization_value[i][4] << 0); /* UVDC */
2893 OUT_BCS_BATCH(batch,
2894 quantization_value[i][3] << 16 | /* Y2AC */
2895 quantization_value[i][2] << 0); /* Y2DC */
2898 /* CoeffProbability table for non-key frame, DW16-DW18 */
2900 OUT_BCS_RELOC64(batch, probs_bo,
2901 0, I915_GEM_DOMAIN_INSTRUCTION,
2903 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2905 OUT_BCS_BATCH(batch, 0);
2906 OUT_BCS_BATCH(batch, 0);
2907 OUT_BCS_BATCH(batch, 0);
2910 OUT_BCS_BATCH(batch,
2911 pic_param->mb_segment_tree_probs[2] << 16 |
2912 pic_param->mb_segment_tree_probs[1] << 8 |
2913 pic_param->mb_segment_tree_probs[0] << 0);
2915 OUT_BCS_BATCH(batch,
2916 pic_param->prob_skip_false << 24 |
2917 pic_param->prob_intra << 16 |
2918 pic_param->prob_last << 8 |
2919 pic_param->prob_gf << 0);
2921 OUT_BCS_BATCH(batch,
2922 pic_param->y_mode_probs[3] << 24 |
2923 pic_param->y_mode_probs[2] << 16 |
2924 pic_param->y_mode_probs[1] << 8 |
2925 pic_param->y_mode_probs[0] << 0);
2927 OUT_BCS_BATCH(batch,
2928 pic_param->uv_mode_probs[2] << 16 |
2929 pic_param->uv_mode_probs[1] << 8 |
2930 pic_param->uv_mode_probs[0] << 0);
2932 /* MV update value, DW23-DW32 */
2933 for (i = 0; i < 2; i++) {
2934 for (j = 0; j < 20; j += 4) {
2935 OUT_BCS_BATCH(batch,
2936 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2937 pic_param->mv_probs[i][j + 2] << 16 |
2938 pic_param->mv_probs[i][j + 1] << 8 |
2939 pic_param->mv_probs[i][j + 0] << 0);
2943 OUT_BCS_BATCH(batch,
2944 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2945 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2946 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2947 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2949 OUT_BCS_BATCH(batch,
2950 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2951 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2952 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2953 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2955 /* segmentation id stream base address, DW35-DW37 */
2956 if (enable_segmentation) {
2957 OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2958 0, I915_GEM_DOMAIN_INSTRUCTION,
2960 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2962 OUT_BCS_BATCH(batch, 0);
2963 OUT_BCS_BATCH(batch, 0);
2964 OUT_BCS_BATCH(batch, 0);
2966 ADVANCE_BCS_BATCH(batch);
2970 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2971 VAPictureParameterBufferVP8 *pic_param,
2972 VASliceParameterBufferVP8 *slice_param,
2973 dri_bo *slice_data_bo,
2974 struct gen7_mfd_context *gen7_mfd_context)
2976 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2978 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
2979 unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
2980 unsigned int partition_size_0 = slice_param->partition_size[0];
2982 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2983 if (used_bits == 8) {
2986 partition_size_0 -= 1;
2989 assert(slice_param->num_of_partitions >= 2);
2990 assert(slice_param->num_of_partitions <= 9);
2992 log2num = (int)log2(slice_param->num_of_partitions - 1);
2994 BEGIN_BCS_BATCH(batch, 22);
2995 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2996 OUT_BCS_BATCH(batch,
2997 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2998 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
3000 (slice_param->macroblock_offset & 0x7));
3001 OUT_BCS_BATCH(batch,
3002 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3005 OUT_BCS_BATCH(batch, partition_size_0 + 1);
3006 OUT_BCS_BATCH(batch, offset);
3007 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3008 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3009 for (i = 1; i < 9; i++) {
3010 if (i < slice_param->num_of_partitions) {
3011 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3012 OUT_BCS_BATCH(batch, offset);
3014 OUT_BCS_BATCH(batch, 0);
3015 OUT_BCS_BATCH(batch, 0);
3018 offset += slice_param->partition_size[i];
3021 OUT_BCS_BATCH(batch, 0); /* concealment method */
3023 ADVANCE_BCS_BATCH(batch);
3027 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3028 struct decode_state *decode_state,
3029 struct gen7_mfd_context *gen7_mfd_context)
3031 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3032 VAPictureParameterBufferVP8 *pic_param;
3033 VASliceParameterBufferVP8 *slice_param;
3034 dri_bo *slice_data_bo;
3036 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3037 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3039 /* one slice per frame */
3040 if (decode_state->num_slice_params != 1 ||
3041 (!decode_state->slice_params ||
3042 !decode_state->slice_params[0] ||
3043 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3044 (!decode_state->slice_datas ||
3045 !decode_state->slice_datas[0] ||
3046 !decode_state->slice_datas[0]->bo) ||
3047 !decode_state->probability_data) {
3048 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3053 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3054 slice_data_bo = decode_state->slice_datas[0]->bo;
3056 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3057 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3058 intel_batchbuffer_emit_mi_flush(batch);
3059 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3060 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3061 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3062 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3063 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3064 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3065 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3066 intel_batchbuffer_end_atomic(batch);
3067 intel_batchbuffer_flush(batch);
3071 gen8_mfd_decode_picture(VADriverContextP ctx,
3073 union codec_state *codec_state,
3074 struct hw_context *hw_context)
3077 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3078 struct decode_state *decode_state = &codec_state->decode;
3081 assert(gen7_mfd_context);
3083 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3085 if (vaStatus != VA_STATUS_SUCCESS)
3088 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3091 case VAProfileMPEG2Simple:
3092 case VAProfileMPEG2Main:
3093 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3096 case VAProfileH264ConstrainedBaseline:
3097 case VAProfileH264Main:
3098 case VAProfileH264High:
3099 case VAProfileH264StereoHigh:
3100 case VAProfileH264MultiviewHigh:
3101 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3104 case VAProfileVC1Simple:
3105 case VAProfileVC1Main:
3106 case VAProfileVC1Advanced:
3107 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3110 case VAProfileJPEGBaseline:
3111 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3114 case VAProfileVP8Version0_3:
3115 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3123 vaStatus = VA_STATUS_SUCCESS;
3130 gen8_mfd_context_destroy(void *hw_context)
3132 VADriverContextP ctx;
3133 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3135 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3137 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3138 gen7_mfd_context->post_deblocking_output.bo = NULL;
3140 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3141 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3143 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3144 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3146 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3147 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3149 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3150 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3152 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3153 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3155 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3156 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3158 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3159 gen7_mfd_context->segmentation_buffer.bo = NULL;
3161 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3163 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3164 i965_DestroySurfaces(ctx,
3165 &gen7_mfd_context->jpeg_wa_surface_id,
3167 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3170 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3171 free(gen7_mfd_context);
3174 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3175 struct gen7_mfd_context *gen7_mfd_context)
3177 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3178 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3179 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3180 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3184 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3186 struct intel_driver_data *intel = intel_driver_data(ctx);
3187 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3190 if (!gen7_mfd_context)
3193 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3194 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3195 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3197 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3198 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3199 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3202 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3203 gen7_mfd_context->segmentation_buffer.valid = 0;
3205 switch (obj_config->profile) {
3206 case VAProfileMPEG2Simple:
3207 case VAProfileMPEG2Main:
3208 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3211 case VAProfileH264ConstrainedBaseline:
3212 case VAProfileH264Main:
3213 case VAProfileH264High:
3214 case VAProfileH264StereoHigh:
3215 case VAProfileH264MultiviewHigh:
3216 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3222 gen7_mfd_context->driver_context = ctx;
3223 return (struct hw_context *)gen7_mfd_context;