2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
78 if (!gen7_avc_surface)
81 gen7_avc_surface->base.frame_store_id = -1;
82 assert((obj_surface->size & 0x3f) == 0);
83 obj_surface->private_data = gen7_avc_surface;
86 /* DMV buffers now relate to the whole frame, irrespective of
88 if (gen7_avc_surface->dmv_top == NULL) {
89 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90 "direct mv w/r buffer",
91 width_in_mbs * height_in_mbs * 128,
93 assert(gen7_avc_surface->dmv_top);
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99 struct decode_state *decode_state,
101 struct gen7_mfd_context *gen7_mfd_context)
103 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
105 assert(standard_select == MFX_FORMAT_MPEG2 ||
106 standard_select == MFX_FORMAT_AVC ||
107 standard_select == MFX_FORMAT_VC1 ||
108 standard_select == MFX_FORMAT_JPEG ||
109 standard_select == MFX_FORMAT_VP8);
111 BEGIN_BCS_BATCH(batch, 5);
112 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
114 (MFX_LONG_MODE << 17) | /* Currently only support long format */
115 (MFD_MODE_VLD << 15) | /* VLD mode */
116 (0 << 10) | /* disable Stream-Out */
117 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
118 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
119 (0 << 5) | /* not in stitch mode */
120 (MFX_CODEC_DECODE << 4) | /* decoding mode */
121 (standard_select << 0));
123 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
124 (0 << 3) | /* terminate if AVC mbdata error occurs */
125 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
128 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129 OUT_BCS_BATCH(batch, 0); /* reserved */
130 ADVANCE_BCS_BATCH(batch);
134 gen8_mfd_surface_state(VADriverContextP ctx,
135 struct decode_state *decode_state,
137 struct gen7_mfd_context *gen7_mfd_context)
139 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140 struct object_surface *obj_surface = decode_state->render_object;
141 unsigned int y_cb_offset;
142 unsigned int y_cr_offset;
143 unsigned int surface_format;
147 y_cb_offset = obj_surface->y_cb_offset;
148 y_cr_offset = obj_surface->y_cr_offset;
150 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
153 BEGIN_BCS_BATCH(batch, 6);
154 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155 OUT_BCS_BATCH(batch, 0);
157 ((obj_surface->orig_height - 1) << 18) |
158 ((obj_surface->orig_width - 1) << 4));
160 (surface_format << 28) | /* 420 planar YUV surface */
161 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162 (0 << 22) | /* surface object control state, ignored */
163 ((obj_surface->width - 1) << 3) | /* pitch */
164 (0 << 2) | /* must be 0 */
165 (1 << 1) | /* must be tiled */
166 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
168 (0 << 16) | /* X offset for U(Cb), must be 0 */
169 (y_cb_offset << 0)); /* Y offset for U(Cb) */
171 (0 << 16) | /* X offset for V(Cr), must be 0 */
172 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173 ADVANCE_BCS_BATCH(batch);
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178 struct decode_state *decode_state,
180 struct gen7_mfd_context *gen7_mfd_context)
182 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
198 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
200 /* Post-debloing 4-6 */
201 if (gen7_mfd_context->post_deblocking_output.valid)
202 OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
206 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
212 /* uncompressed-video & stream out 7-12 */
213 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215 OUT_BCS_BATCH(batch, 0);
216 OUT_BCS_BATCH(batch, 0);
217 OUT_BCS_BATCH(batch, 0);
218 OUT_BCS_BATCH(batch, 0);
220 /* intra row-store scratch 13-15 */
221 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222 OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
232 /* deblocking-filter-row-store 16-18 */
233 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234 OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238 OUT_BCS_BATCH(batch, 0);
239 OUT_BCS_BATCH(batch, 0);
242 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
245 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246 struct object_surface *obj_surface;
248 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249 gen7_mfd_context->reference_surface[i].obj_surface &&
250 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
253 OUT_BCS_RELOC64(batch, obj_surface->bo,
254 I915_GEM_DOMAIN_INSTRUCTION, 0,
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
263 /* reference property 51 */
264 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266 /* Macroblock status & ILDB 52-57 */
267 OUT_BCS_BATCH(batch, 0);
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
274 /* the second Macroblock status 58-60 */
275 OUT_BCS_BATCH(batch, 0);
276 OUT_BCS_BATCH(batch, 0);
277 OUT_BCS_BATCH(batch, 0);
279 ADVANCE_BCS_BATCH(batch);
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284 dri_bo *slice_data_bo,
286 struct gen7_mfd_context *gen7_mfd_context)
288 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289 struct i965_driver_data *i965 = i965_driver_data(ctx);
291 BEGIN_BCS_BATCH(batch, 26);
292 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
294 OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296 /* Upper bound 4-5 */
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX indirect MV 6-10 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_COFF 11-15 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX IT_DBLK 16-20 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 /* MFX PAK_BSE object for encoder 21-25 */
322 OUT_BCS_BATCH(batch, 0);
323 OUT_BCS_BATCH(batch, 0);
324 OUT_BCS_BATCH(batch, 0);
325 OUT_BCS_BATCH(batch, 0);
326 OUT_BCS_BATCH(batch, 0);
328 ADVANCE_BCS_BATCH(batch);
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333 struct decode_state *decode_state,
335 struct gen7_mfd_context *gen7_mfd_context)
337 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338 struct i965_driver_data *i965 = i965_driver_data(ctx);
340 BEGIN_BCS_BATCH(batch, 10);
341 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
343 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344 OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348 OUT_BCS_BATCH(batch, 0);
349 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353 /* MPR Row Store Scratch buffer 4-6 */
354 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355 OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
359 OUT_BCS_BATCH(batch, 0);
360 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
366 if (gen7_mfd_context->bitplane_read_buffer.valid)
367 OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368 I915_GEM_DOMAIN_INSTRUCTION, 0,
371 OUT_BCS_BATCH(batch, 0);
372 OUT_BCS_BATCH(batch, 0);
374 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375 ADVANCE_BCS_BATCH(batch);
379 gen8_mfd_qm_state(VADriverContextP ctx,
383 struct gen7_mfd_context *gen7_mfd_context)
385 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386 unsigned int qm_buffer[16];
388 assert(qm_length <= 16 * 4);
389 memcpy(qm_buffer, qm, qm_length);
391 BEGIN_BCS_BATCH(batch, 18);
392 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393 OUT_BCS_BATCH(batch, qm_type << 0);
394 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395 ADVANCE_BCS_BATCH(batch);
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400 struct decode_state *decode_state,
401 struct gen7_mfd_context *gen7_mfd_context)
403 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
405 int mbaff_frame_flag;
406 unsigned int width_in_mbs, height_in_mbs;
407 VAPictureParameterBufferH264 *pic_param;
409 assert(decode_state->pic_param && decode_state->pic_param->buffer);
410 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
413 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
415 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
420 if ((img_struct & 0x1) == 0x1) {
421 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
423 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
426 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
430 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
433 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434 !pic_param->pic_fields.bits.field_pic_flag);
436 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
439 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
442 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
444 BEGIN_BCS_BATCH(batch, 17);
445 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
447 (width_in_mbs * height_in_mbs - 1));
449 ((height_in_mbs - 1) << 16) |
450 ((width_in_mbs - 1) << 0));
452 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
460 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467 (mbaff_frame_flag << 1) |
468 (pic_param->pic_fields.bits.field_pic_flag << 0));
469 OUT_BCS_BATCH(batch, 0);
470 OUT_BCS_BATCH(batch, 0);
471 OUT_BCS_BATCH(batch, 0);
472 OUT_BCS_BATCH(batch, 0);
473 OUT_BCS_BATCH(batch, 0);
474 OUT_BCS_BATCH(batch, 0);
475 OUT_BCS_BATCH(batch, 0);
476 OUT_BCS_BATCH(batch, 0);
477 OUT_BCS_BATCH(batch, 0);
478 OUT_BCS_BATCH(batch, 0);
479 OUT_BCS_BATCH(batch, 0);
480 OUT_BCS_BATCH(batch, 0);
481 ADVANCE_BCS_BATCH(batch);
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486 struct decode_state *decode_state,
487 struct gen7_mfd_context *gen7_mfd_context)
489 VAIQMatrixBufferH264 *iq_matrix;
490 VAPictureParameterBufferH264 *pic_param;
492 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
495 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
497 assert(decode_state->pic_param && decode_state->pic_param->buffer);
498 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
500 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
503 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511 struct decode_state *decode_state,
512 struct gen7_mfd_context *gen7_mfd_context)
514 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515 gen7_mfd_context->reference_surface);
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520 struct decode_state *decode_state,
521 VAPictureParameterBufferH264 *pic_param,
522 VASliceParameterBufferH264 *slice_param,
523 struct gen7_mfd_context *gen7_mfd_context)
525 struct i965_driver_data *i965 = i965_driver_data(ctx);
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
548 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
553 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
555 /* the current decoding frame/field */
556 va_pic = &pic_param->CurrPic;
557 obj_surface = decode_state->render_object;
558 assert(obj_surface->bo && obj_surface->private_data);
559 gen7_avc_surface = obj_surface->private_data;
561 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
565 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
568 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
572 const VAPictureH264 * const va_pic = avc_find_picture(
573 obj_surface->base.id, pic_param->ReferenceFrames,
574 ARRAY_ELEMS(pic_param->ReferenceFrames));
576 assert(va_pic != NULL);
577 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
580 OUT_BCS_BATCH(batch, 0);
581 OUT_BCS_BATCH(batch, 0);
585 va_pic = &pic_param->CurrPic;
586 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
589 ADVANCE_BCS_BATCH(batch);
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594 VAPictureParameterBufferH264 *pic_param,
595 VASliceParameterBufferH264 *next_slice_param,
596 struct gen7_mfd_context *gen7_mfd_context)
598 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603 VAPictureParameterBufferH264 *pic_param,
604 VASliceParameterBufferH264 *slice_param,
605 VASliceParameterBufferH264 *next_slice_param,
606 struct gen7_mfd_context *gen7_mfd_context)
608 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612 int num_ref_idx_l0, num_ref_idx_l1;
613 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
618 if (slice_param->slice_type == SLICE_TYPE_I ||
619 slice_param->slice_type == SLICE_TYPE_SI) {
620 slice_type = SLICE_TYPE_I;
621 } else if (slice_param->slice_type == SLICE_TYPE_P ||
622 slice_param->slice_type == SLICE_TYPE_SP) {
623 slice_type = SLICE_TYPE_P;
625 assert(slice_param->slice_type == SLICE_TYPE_B);
626 slice_type = SLICE_TYPE_B;
629 if (slice_type == SLICE_TYPE_I) {
630 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
634 } else if (slice_type == SLICE_TYPE_P) {
635 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
643 first_mb_in_slice = slice_param->first_mb_in_slice;
644 slice_hor_pos = first_mb_in_slice % width_in_mbs;
645 slice_ver_pos = first_mb_in_slice / width_in_mbs;
648 slice_ver_pos = slice_ver_pos << 1;
649 if (next_slice_param) {
650 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
655 next_slice_ver_pos = next_slice_ver_pos << 1;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839 gen7_mfd_context->reference_surface);
840 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843 assert(height_in_mbs > 0 && height_in_mbs <= 256);
845 /* Current decoded picture */
846 obj_surface = decode_state->render_object;
847 if (pic_param->pic_fields.bits.reference_pic_flag)
848 obj_surface->flags |= SURFACE_REFERENCED;
850 obj_surface->flags &= ~SURFACE_REFERENCED;
852 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
855 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
860 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
865 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866 bo = dri_bo_alloc(i965->intel.bufmgr,
871 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
874 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875 bo = dri_bo_alloc(i965->intel.bufmgr,
876 "deblocking filter row store",
877 width_in_mbs * 64 * 4,
880 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
883 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884 bo = dri_bo_alloc(i965->intel.bufmgr,
886 width_in_mbs * 64 * 2,
889 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
892 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893 bo = dri_bo_alloc(i965->intel.bufmgr,
895 width_in_mbs * 64 * 2,
898 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
901 gen7_mfd_context->bitplane_read_buffer.valid = 0;
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906 struct decode_state *decode_state,
907 struct gen7_mfd_context *gen7_mfd_context)
909 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910 VAPictureParameterBufferH264 *pic_param;
911 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912 dri_bo *slice_data_bo;
915 assert(decode_state->pic_param && decode_state->pic_param->buffer);
916 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
919 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920 intel_batchbuffer_emit_mi_flush(batch);
921 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
929 for (j = 0; j < decode_state->num_slice_params; j++) {
930 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932 slice_data_bo = decode_state->slice_datas[j]->bo;
933 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
935 if (j == decode_state->num_slice_params - 1)
936 next_slice_group_param = NULL;
938 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
940 if (j == 0 && slice_param->first_mb_in_slice)
941 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
943 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945 assert((slice_param->slice_type == SLICE_TYPE_I) ||
946 (slice_param->slice_type == SLICE_TYPE_SI) ||
947 (slice_param->slice_type == SLICE_TYPE_P) ||
948 (slice_param->slice_type == SLICE_TYPE_SP) ||
949 (slice_param->slice_type == SLICE_TYPE_B));
951 if (i < decode_state->slice_params[j]->num_elements - 1)
952 next_slice_param = slice_param + 1;
954 next_slice_param = next_slice_group_param;
956 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965 intel_batchbuffer_end_atomic(batch);
966 intel_batchbuffer_flush(batch);
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971 struct decode_state *decode_state,
972 struct gen7_mfd_context *gen7_mfd_context)
974 VAPictureParameterBufferMPEG2 *pic_param;
975 struct i965_driver_data *i965 = i965_driver_data(ctx);
976 struct object_surface *obj_surface;
978 unsigned int width_in_mbs;
980 assert(decode_state->pic_param && decode_state->pic_param->buffer);
981 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
984 mpeg2_set_reference_surfaces(
986 gen7_mfd_context->reference_surface,
991 /* Current decoded picture */
992 obj_surface = decode_state->render_object;
993 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
995 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998 gen7_mfd_context->pre_deblocking_output.valid = 1;
1000 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001 bo = dri_bo_alloc(i965->intel.bufmgr,
1002 "bsd mpc row store",
1006 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1009 gen7_mfd_context->post_deblocking_output.valid = 0;
1010 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018 struct decode_state *decode_state,
1019 struct gen7_mfd_context *gen7_mfd_context)
1021 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022 VAPictureParameterBufferMPEG2 *pic_param;
1023 unsigned int slice_concealment_disable_bit = 0;
1025 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1028 slice_concealment_disable_bit = 1;
1030 BEGIN_BCS_BATCH(batch, 13);
1031 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032 OUT_BCS_BATCH(batch,
1033 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045 OUT_BCS_BATCH(batch,
1046 pic_param->picture_coding_type << 9);
1047 OUT_BCS_BATCH(batch,
1048 (slice_concealment_disable_bit << 31) |
1049 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051 OUT_BCS_BATCH(batch, 0);
1052 OUT_BCS_BATCH(batch, 0);
1053 OUT_BCS_BATCH(batch, 0);
1054 OUT_BCS_BATCH(batch, 0);
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 ADVANCE_BCS_BATCH(batch);
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065 struct decode_state *decode_state,
1066 struct gen7_mfd_context *gen7_mfd_context)
1068 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1071 /* Update internal QM state */
1072 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073 VAIQMatrixBufferMPEG2 * const iq_matrix =
1074 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1076 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077 iq_matrix->load_intra_quantiser_matrix) {
1078 gen_iq_matrix->load_intra_quantiser_matrix =
1079 iq_matrix->load_intra_quantiser_matrix;
1080 if (iq_matrix->load_intra_quantiser_matrix) {
1081 for (j = 0; j < 64; j++)
1082 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083 iq_matrix->intra_quantiser_matrix[j];
1087 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088 iq_matrix->load_non_intra_quantiser_matrix) {
1089 gen_iq_matrix->load_non_intra_quantiser_matrix =
1090 iq_matrix->load_non_intra_quantiser_matrix;
1091 if (iq_matrix->load_non_intra_quantiser_matrix) {
1092 for (j = 0; j < 64; j++)
1093 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094 iq_matrix->non_intra_quantiser_matrix[j];
1099 /* Commit QM state to HW */
1100 for (i = 0; i < 2; i++) {
1101 unsigned char *qm = NULL;
1105 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106 qm = gen_iq_matrix->intra_quantiser_matrix;
1107 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1110 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1119 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125 VAPictureParameterBufferMPEG2 *pic_param,
1126 VASliceParameterBufferMPEG2 *slice_param,
1127 VASliceParameterBufferMPEG2 *next_slice_param,
1128 struct gen7_mfd_context *gen7_mfd_context)
1130 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1134 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1137 is_field_pic_wa = is_field_pic &&
1138 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1140 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141 hpos0 = slice_param->slice_horizontal_position;
1143 if (next_slice_param == NULL) {
1144 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1147 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148 hpos1 = next_slice_param->slice_horizontal_position;
1151 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1153 BEGIN_BCS_BATCH(batch, 5);
1154 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155 OUT_BCS_BATCH(batch,
1156 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157 OUT_BCS_BATCH(batch,
1158 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159 OUT_BCS_BATCH(batch,
1163 (next_slice_param == NULL) << 5 |
1164 (next_slice_param == NULL) << 3 |
1165 (slice_param->macroblock_offset & 0x7));
1166 OUT_BCS_BATCH(batch,
1167 (slice_param->quantiser_scale_code << 24) |
1168 (vpos1 << 8 | hpos1));
1169 ADVANCE_BCS_BATCH(batch);
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174 struct decode_state *decode_state,
1175 struct gen7_mfd_context *gen7_mfd_context)
1177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178 VAPictureParameterBufferMPEG2 *pic_param;
1179 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180 dri_bo *slice_data_bo;
1183 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1186 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188 intel_batchbuffer_emit_mi_flush(batch);
1189 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1196 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1200 for (j = 0; j < decode_state->num_slice_params; j++) {
1201 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203 slice_data_bo = decode_state->slice_datas[j]->bo;
1204 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1206 if (j == decode_state->num_slice_params - 1)
1207 next_slice_group_param = NULL;
1209 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1211 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1214 if (i < decode_state->slice_params[j]->num_elements - 1)
1215 next_slice_param = slice_param + 1;
1217 next_slice_param = next_slice_group_param;
1219 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224 intel_batchbuffer_end_atomic(batch);
1225 intel_batchbuffer_flush(batch);
1228 static const int va_to_gen7_vc1_pic_type[5] = {
1232 GEN7_VC1_BI_PICTURE,
1236 static const int va_to_gen7_vc1_mv[4] = {
1238 2, /* 1-MV half-pel */
1239 3, /* 1-MV half-pef bilinear */
1243 static const int b_picture_scale_factor[21] = {
1244 128, 85, 170, 64, 192,
1245 51, 102, 153, 204, 43,
1246 215, 37, 74, 111, 148,
1247 185, 222, 32, 96, 160,
1251 static const int va_to_gen7_vc1_condover[3] = {
1257 static const int va_to_gen7_vc1_profile[4] = {
1258 GEN7_VC1_SIMPLE_PROFILE,
1259 GEN7_VC1_MAIN_PROFILE,
1260 GEN7_VC1_RESERVED_PROFILE,
1261 GEN7_VC1_ADVANCED_PROFILE
1265 gen8_mfd_free_vc1_surface(void **data)
1267 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1269 if (!gen7_vc1_surface)
1272 dri_bo_unreference(gen7_vc1_surface->dmv);
1273 free(gen7_vc1_surface);
1278 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1279 VAPictureParameterBufferVC1 *pic_param,
1280 struct object_surface *obj_surface)
1282 struct i965_driver_data *i965 = i965_driver_data(ctx);
1283 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1284 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1287 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1289 if (!gen7_vc1_surface) {
1290 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1292 if (!gen7_vc1_surface)
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300 gen7_vc1_surface->intensity_compensation = 0;
1301 gen7_vc1_surface->luma_scale = 0;
1302 gen7_vc1_surface->luma_shift = 0;
1304 if (gen7_vc1_surface->dmv == NULL) {
1305 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1306 "direct mv w/r buffer",
1307 width_in_mbs * height_in_mbs * 64,
1313 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1314 struct decode_state *decode_state,
1315 struct gen7_mfd_context *gen7_mfd_context)
1317 VAPictureParameterBufferVC1 *pic_param;
1318 struct i965_driver_data *i965 = i965_driver_data(ctx);
1319 struct object_surface *obj_surface;
1323 int intensity_compensation;
1325 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1326 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1327 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1328 picture_type = pic_param->picture_fields.bits.picture_type;
1329 intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1331 intel_update_vc1_frame_store_index(ctx,
1334 gen7_mfd_context->reference_surface);
1336 /* Forward reference picture */
1337 obj_surface = decode_state->reference_objects[0];
1338 if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1340 obj_surface->private_data) {
1341 if (picture_type == 1 && intensity_compensation) { /* P picture */
1342 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1344 gen7_vc1_surface->intensity_compensation = intensity_compensation;
1345 gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1346 gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1350 /* Current decoded picture */
1351 obj_surface = decode_state->render_object;
1352 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1353 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1355 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1356 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1357 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1358 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1360 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1361 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1362 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1363 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1365 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1366 bo = dri_bo_alloc(i965->intel.bufmgr,
1371 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1372 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1374 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1375 bo = dri_bo_alloc(i965->intel.bufmgr,
1376 "deblocking filter row store",
1377 width_in_mbs * 7 * 64,
1380 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1381 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1383 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1384 bo = dri_bo_alloc(i965->intel.bufmgr,
1385 "bsd mpc row store",
1389 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1390 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1392 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1394 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1395 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1397 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1398 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1399 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1400 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1402 uint8_t *src = NULL, *dst = NULL;
1404 assert(decode_state->bit_plane->buffer);
1405 src = decode_state->bit_plane->buffer;
1407 bo = dri_bo_alloc(i965->intel.bufmgr,
1409 bitplane_width * height_in_mbs,
1412 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1414 dri_bo_map(bo, True);
1415 assert(bo->virtual);
1418 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1419 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1420 int src_index, dst_index;
1424 src_index = (src_h * width_in_mbs + src_w) / 2;
1425 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1426 src_value = ((src[src_index] >> src_shift) & 0xf);
1428 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1432 dst_index = src_w / 2;
1433 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1437 dst[src_w / 2] >>= 4;
1439 dst += bitplane_width;
1444 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1448 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1449 struct decode_state *decode_state,
1450 struct gen7_mfd_context *gen7_mfd_context)
1452 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1453 VAPictureParameterBufferVC1 *pic_param;
1454 struct object_surface *obj_surface;
1455 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1456 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1457 int unified_mv_mode;
1458 int ref_field_pic_polarity = 0;
1459 int scale_factor = 0;
1461 int dmv_surface_valid = 0;
1467 int interpolation_mode = 0;
1469 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1470 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1472 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1473 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1474 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1475 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1476 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1477 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1478 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1479 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1482 alt_pquant_config = 0;
1483 alt_pquant_edge_mask = 0;
1484 } else if (dquant == 2) {
1485 alt_pquant_config = 1;
1486 alt_pquant_edge_mask = 0xf;
1488 assert(dquant == 1);
1489 if (dquantfrm == 0) {
1490 alt_pquant_config = 0;
1491 alt_pquant_edge_mask = 0;
1494 assert(dquantfrm == 1);
1495 alt_pquant_config = 1;
1497 switch (dqprofile) {
1499 if (dqbilevel == 0) {
1500 alt_pquant_config = 2;
1501 alt_pquant_edge_mask = 0;
1503 assert(dqbilevel == 1);
1504 alt_pquant_config = 3;
1505 alt_pquant_edge_mask = 0;
1510 alt_pquant_edge_mask = 0xf;
1515 alt_pquant_edge_mask = 0x9;
1517 alt_pquant_edge_mask = (0x3 << dqdbedge);
1522 alt_pquant_edge_mask = (0x1 << dqsbedge);
1531 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1532 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1533 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1535 assert(pic_param->mv_fields.bits.mv_mode < 4);
1536 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1539 if (pic_param->sequence_fields.bits.interlace == 1 &&
1540 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1541 /* FIXME: calculate reference field picture polarity */
1543 ref_field_pic_polarity = 0;
1546 if (pic_param->b_picture_fraction < 21)
1547 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1549 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1551 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1552 picture_type == GEN7_VC1_I_PICTURE)
1553 picture_type = GEN7_VC1_BI_PICTURE;
1555 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1556 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1558 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1561 * 8.3.6.2.1 Transform Type Selection
1562 * If variable-sized transform coding is not enabled,
1563 * then the 8x8 transform shall be used for all blocks.
1564 * it is also MFX_VC1_PIC_STATE requirement.
1566 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1567 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1568 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1572 if (picture_type == GEN7_VC1_B_PICTURE) {
1573 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1575 obj_surface = decode_state->reference_objects[1];
1578 gen7_vc1_surface = obj_surface->private_data;
1580 if (!gen7_vc1_surface ||
1581 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1582 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1583 dmv_surface_valid = 0;
1585 dmv_surface_valid = 1;
1588 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1590 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1591 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1593 if (pic_param->picture_fields.bits.top_field_first)
1599 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1600 brfd = pic_param->reference_fields.bits.reference_distance;
1601 brfd = (scale_factor * brfd) >> 8;
1602 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1608 overlap = pic_param->sequence_fields.bits.overlap;
1612 if (profile != GEN7_VC1_ADVANCED_PROFILE) {
1613 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1614 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1618 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1619 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1622 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1623 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE) {
1624 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1626 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1627 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1634 assert(pic_param->conditional_overlap_flag < 3);
1635 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1637 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1638 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1639 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1640 interpolation_mode = 9; /* Half-pel bilinear */
1641 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1642 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1643 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1644 interpolation_mode = 1; /* Half-pel bicubic */
1646 interpolation_mode = 0; /* Quarter-pel bicubic */
1648 BEGIN_BCS_BATCH(batch, 6);
1649 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1650 OUT_BCS_BATCH(batch,
1651 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1652 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1653 OUT_BCS_BATCH(batch,
1654 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1655 dmv_surface_valid << 15 |
1656 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1657 pic_param->rounding_control << 13 |
1658 pic_param->sequence_fields.bits.syncmarker << 12 |
1659 interpolation_mode << 8 |
1660 0 << 7 | /* FIXME: scale up or down ??? */
1661 pic_param->range_reduction_frame << 6 |
1662 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1664 !pic_param->picture_fields.bits.is_first_field << 3 |
1665 (pic_param->sequence_fields.bits.profile == 3) << 0);
1666 OUT_BCS_BATCH(batch,
1667 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1668 picture_type << 26 |
1671 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1673 OUT_BCS_BATCH(batch,
1674 unified_mv_mode << 28 |
1675 pic_param->mv_fields.bits.four_mv_switch << 27 |
1676 pic_param->fast_uvmc_flag << 26 |
1677 ref_field_pic_polarity << 25 |
1678 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1679 pic_param->reference_fields.bits.reference_distance << 20 |
1680 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1681 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1682 pic_param->mv_fields.bits.extended_mv_range << 8 |
1683 alt_pquant_edge_mask << 4 |
1684 alt_pquant_config << 2 |
1685 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1686 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1687 OUT_BCS_BATCH(batch,
1688 !!pic_param->bitplane_present.value << 31 |
1689 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1690 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1691 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1692 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1693 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1694 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1695 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1696 pic_param->mv_fields.bits.mv_table << 20 |
1697 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1698 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1699 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1700 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1701 pic_param->mb_mode_table << 8 |
1703 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1704 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1705 pic_param->cbp_table << 0);
1706 ADVANCE_BCS_BATCH(batch);
1710 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1711 struct decode_state *decode_state,
1712 struct gen7_mfd_context *gen7_mfd_context)
1714 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1715 VAPictureParameterBufferVC1 *pic_param;
1717 int intensitycomp_single_fwd = 0;
1718 int luma_scale1 = 0;
1719 int luma_shift1 = 0;
1721 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1722 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1723 picture_type = pic_param->picture_fields.bits.picture_type;
1725 if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1726 if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1727 struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1728 if (gen7_vc1_surface) {
1729 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1730 luma_scale1 = gen7_vc1_surface->luma_scale;
1731 luma_shift1 = gen7_vc1_surface->luma_shift;
1736 BEGIN_BCS_BATCH(batch, 6);
1737 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1738 OUT_BCS_BATCH(batch,
1739 0 << 14 | /* FIXME: double ??? */
1741 intensitycomp_single_fwd << 10 |
1743 0 << 4 | /* FIXME: interlace mode */
1745 OUT_BCS_BATCH(batch,
1748 OUT_BCS_BATCH(batch, 0);
1749 OUT_BCS_BATCH(batch, 0);
1750 OUT_BCS_BATCH(batch, 0);
1751 ADVANCE_BCS_BATCH(batch);
1755 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1756 struct decode_state *decode_state,
1757 struct gen7_mfd_context *gen7_mfd_context)
1759 struct i965_driver_data *i965 = i965_driver_data(ctx);
1760 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1761 struct object_surface *obj_surface;
1762 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1764 obj_surface = decode_state->render_object;
1766 if (obj_surface && obj_surface->private_data) {
1767 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1770 obj_surface = decode_state->reference_objects[1];
1772 if (obj_surface && obj_surface->private_data) {
1773 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1776 BEGIN_BCS_BATCH(batch, 7);
1777 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1779 if (dmv_write_buffer)
1780 OUT_BCS_RELOC64(batch, dmv_write_buffer,
1781 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1784 OUT_BCS_BATCH(batch, 0);
1785 OUT_BCS_BATCH(batch, 0);
1788 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1790 if (dmv_read_buffer)
1791 OUT_BCS_RELOC64(batch, dmv_read_buffer,
1792 I915_GEM_DOMAIN_INSTRUCTION, 0,
1795 OUT_BCS_BATCH(batch, 0);
1796 OUT_BCS_BATCH(batch, 0);
1799 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1801 ADVANCE_BCS_BATCH(batch);
1805 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1807 int out_slice_data_bit_offset;
1808 int slice_header_size = in_slice_data_bit_offset / 8;
1812 out_slice_data_bit_offset = in_slice_data_bit_offset;
1814 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1815 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1820 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1823 return out_slice_data_bit_offset;
1827 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1828 VAPictureParameterBufferVC1 *pic_param,
1829 VASliceParameterBufferVC1 *slice_param,
1830 VASliceParameterBufferVC1 *next_slice_param,
1831 dri_bo *slice_data_bo,
1832 struct gen7_mfd_context *gen7_mfd_context)
1834 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1835 int next_slice_start_vert_pos;
1836 int macroblock_offset;
1837 uint8_t *slice_data = NULL;
1839 dri_bo_map(slice_data_bo, 0);
1840 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1841 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1842 slice_param->macroblock_offset,
1843 pic_param->sequence_fields.bits.profile);
1844 dri_bo_unmap(slice_data_bo);
1846 if (next_slice_param)
1847 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1849 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1851 BEGIN_BCS_BATCH(batch, 5);
1852 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1853 OUT_BCS_BATCH(batch,
1854 slice_param->slice_data_size - (macroblock_offset >> 3));
1855 OUT_BCS_BATCH(batch,
1856 slice_param->slice_data_offset + (macroblock_offset >> 3));
1857 OUT_BCS_BATCH(batch,
1858 slice_param->slice_vertical_position << 16 |
1859 next_slice_start_vert_pos << 0);
1860 OUT_BCS_BATCH(batch,
1861 (macroblock_offset & 0x7));
1862 ADVANCE_BCS_BATCH(batch);
1866 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1867 struct decode_state *decode_state,
1868 struct gen7_mfd_context *gen7_mfd_context)
1870 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1871 VAPictureParameterBufferVC1 *pic_param;
1872 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1873 dri_bo *slice_data_bo;
1876 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1877 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1879 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1880 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1881 intel_batchbuffer_emit_mi_flush(batch);
1882 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1883 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1884 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1885 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1886 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1887 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1888 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1890 for (j = 0; j < decode_state->num_slice_params; j++) {
1891 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1892 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1893 slice_data_bo = decode_state->slice_datas[j]->bo;
1894 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1896 if (j == decode_state->num_slice_params - 1)
1897 next_slice_group_param = NULL;
1899 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1901 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1902 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1904 if (i < decode_state->slice_params[j]->num_elements - 1)
1905 next_slice_param = slice_param + 1;
1907 next_slice_param = next_slice_group_param;
1909 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1914 intel_batchbuffer_end_atomic(batch);
1915 intel_batchbuffer_flush(batch);
1919 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1920 struct decode_state *decode_state,
1921 struct gen7_mfd_context *gen7_mfd_context)
1923 struct object_surface *obj_surface;
1924 VAPictureParameterBufferJPEGBaseline *pic_param;
1925 int subsampling = SUBSAMPLE_YUV420;
1926 int fourcc = VA_FOURCC_IMC3;
1928 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1930 if (pic_param->num_components == 1) {
1931 subsampling = SUBSAMPLE_YUV400;
1932 fourcc = VA_FOURCC_Y800;
1933 } else if (pic_param->num_components == 3) {
1934 int h1 = pic_param->components[0].h_sampling_factor;
1935 int h2 = pic_param->components[1].h_sampling_factor;
1936 int h3 = pic_param->components[2].h_sampling_factor;
1937 int v1 = pic_param->components[0].v_sampling_factor;
1938 int v2 = pic_param->components[1].v_sampling_factor;
1939 int v3 = pic_param->components[2].v_sampling_factor;
1941 if (h1 == 2 * h2 && h2 == h3 &&
1942 v1 == 2 * v2 && v2 == v3) {
1943 subsampling = SUBSAMPLE_YUV420;
1944 fourcc = VA_FOURCC_IMC3;
1945 } else if (h1 == 2 * h2 && h2 == h3 &&
1946 v1 == v2 && v2 == v3) {
1947 subsampling = SUBSAMPLE_YUV422H;
1948 fourcc = VA_FOURCC_422H;
1949 } else if (h1 == h2 && h2 == h3 &&
1950 v1 == v2 && v2 == v3) {
1951 subsampling = SUBSAMPLE_YUV444;
1952 fourcc = VA_FOURCC_444P;
1953 } else if (h1 == 4 * h2 && h2 == h3 &&
1954 v1 == v2 && v2 == v3) {
1955 subsampling = SUBSAMPLE_YUV411;
1956 fourcc = VA_FOURCC_411P;
1957 } else if (h1 == h2 && h2 == h3 &&
1958 v1 == 2 * v2 && v2 == v3) {
1959 subsampling = SUBSAMPLE_YUV422V;
1960 fourcc = VA_FOURCC_422V;
1967 /* Current decoded picture */
1968 obj_surface = decode_state->render_object;
1969 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1971 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1972 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1973 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1974 gen7_mfd_context->pre_deblocking_output.valid = 1;
1976 gen7_mfd_context->post_deblocking_output.bo = NULL;
1977 gen7_mfd_context->post_deblocking_output.valid = 0;
1979 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1980 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1982 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1983 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1985 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1986 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1988 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1989 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1991 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1992 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1995 static const int va_to_gen7_jpeg_rotation[4] = {
1996 GEN7_JPEG_ROTATION_0,
1997 GEN7_JPEG_ROTATION_90,
1998 GEN7_JPEG_ROTATION_180,
1999 GEN7_JPEG_ROTATION_270
2003 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2004 struct decode_state *decode_state,
2005 struct gen7_mfd_context *gen7_mfd_context)
2007 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2008 VAPictureParameterBufferJPEGBaseline *pic_param;
2009 int chroma_type = GEN7_YUV420;
2010 int frame_width_in_blks;
2011 int frame_height_in_blks;
2013 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2014 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2016 if (pic_param->num_components == 1)
2017 chroma_type = GEN7_YUV400;
2018 else if (pic_param->num_components == 3) {
2019 int h1 = pic_param->components[0].h_sampling_factor;
2020 int h2 = pic_param->components[1].h_sampling_factor;
2021 int h3 = pic_param->components[2].h_sampling_factor;
2022 int v1 = pic_param->components[0].v_sampling_factor;
2023 int v2 = pic_param->components[1].v_sampling_factor;
2024 int v3 = pic_param->components[2].v_sampling_factor;
2026 if (h1 == 2 * h2 && h2 == h3 &&
2027 v1 == 2 * v2 && v2 == v3)
2028 chroma_type = GEN7_YUV420;
2029 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2030 v1 == 1 && v2 == 1 && v3 == 1)
2031 chroma_type = GEN7_YUV422H_2Y;
2032 else if (h1 == h2 && h2 == h3 &&
2033 v1 == v2 && v2 == v3)
2034 chroma_type = GEN7_YUV444;
2035 else if (h1 == 4 * h2 && h2 == h3 &&
2036 v1 == v2 && v2 == v3)
2037 chroma_type = GEN7_YUV411;
2038 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2039 v1 == 2 && v2 == 1 && v3 == 1)
2040 chroma_type = GEN7_YUV422V_2Y;
2041 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2042 v1 == 2 && v2 == 2 && v3 == 2)
2043 chroma_type = GEN7_YUV422H_4Y;
2044 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2045 v1 == 2 && v2 == 1 && v3 == 1)
2046 chroma_type = GEN7_YUV422V_4Y;
2051 if (chroma_type == GEN7_YUV400 ||
2052 chroma_type == GEN7_YUV444 ||
2053 chroma_type == GEN7_YUV422V_2Y) {
2054 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2055 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2056 } else if (chroma_type == GEN7_YUV411) {
2057 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2058 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2060 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2061 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2064 BEGIN_BCS_BATCH(batch, 3);
2065 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2066 OUT_BCS_BATCH(batch,
2067 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2068 (chroma_type << 0));
2069 OUT_BCS_BATCH(batch,
2070 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2071 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2072 ADVANCE_BCS_BATCH(batch);
2075 static const int va_to_gen7_jpeg_hufftable[2] = {
2081 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2082 struct decode_state *decode_state,
2083 struct gen7_mfd_context *gen7_mfd_context,
2086 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2087 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2090 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2093 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2095 for (index = 0; index < num_tables; index++) {
2096 int id = va_to_gen7_jpeg_hufftable[index];
2097 if (!huffman_table->load_huffman_table[index])
2099 BEGIN_BCS_BATCH(batch, 53);
2100 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2101 OUT_BCS_BATCH(batch, id);
2102 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2103 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2104 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2105 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2106 ADVANCE_BCS_BATCH(batch);
2110 static const int va_to_gen7_jpeg_qm[5] = {
2112 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2113 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2114 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2115 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2119 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2120 struct decode_state *decode_state,
2121 struct gen7_mfd_context *gen7_mfd_context)
2123 VAPictureParameterBufferJPEGBaseline *pic_param;
2124 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2127 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2130 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2131 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2133 assert(pic_param->num_components <= 3);
2135 for (index = 0; index < pic_param->num_components; index++) {
2136 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2138 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2139 unsigned char raster_qm[64];
2142 if (id > 4 || id < 1)
2145 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2148 qm_type = va_to_gen7_jpeg_qm[id];
2150 for (j = 0; j < 64; j++)
2151 raster_qm[zigzag_direct[j]] = qm[j];
2153 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2158 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2159 VAPictureParameterBufferJPEGBaseline *pic_param,
2160 VASliceParameterBufferJPEGBaseline *slice_param,
2161 VASliceParameterBufferJPEGBaseline *next_slice_param,
2162 dri_bo *slice_data_bo,
2163 struct gen7_mfd_context *gen7_mfd_context)
2165 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2166 int scan_component_mask = 0;
2169 assert(slice_param->num_components > 0);
2170 assert(slice_param->num_components < 4);
2171 assert(slice_param->num_components <= pic_param->num_components);
2173 for (i = 0; i < slice_param->num_components; i++) {
2174 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2176 scan_component_mask |= (1 << 0);
2179 scan_component_mask |= (1 << 1);
2182 scan_component_mask |= (1 << 2);
2190 BEGIN_BCS_BATCH(batch, 6);
2191 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2192 OUT_BCS_BATCH(batch,
2193 slice_param->slice_data_size);
2194 OUT_BCS_BATCH(batch,
2195 slice_param->slice_data_offset);
2196 OUT_BCS_BATCH(batch,
2197 slice_param->slice_horizontal_position << 16 |
2198 slice_param->slice_vertical_position << 0);
2199 OUT_BCS_BATCH(batch,
2200 ((slice_param->num_components != 1) << 30) | /* interleaved */
2201 (scan_component_mask << 27) | /* scan components */
2202 (0 << 26) | /* disable interrupt allowed */
2203 (slice_param->num_mcus << 0)); /* MCU count */
2204 OUT_BCS_BATCH(batch,
2205 (slice_param->restart_interval << 0)); /* RestartInterval */
2206 ADVANCE_BCS_BATCH(batch);
2209 /* Workaround for JPEG decoding on Ivybridge */
2215 unsigned char data[32];
2217 int data_bit_offset;
2219 } gen7_jpeg_wa_clip = {
2223 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2224 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2232 gen8_jpeg_wa_init(VADriverContextP ctx,
2233 struct gen7_mfd_context *gen7_mfd_context)
2235 struct i965_driver_data *i965 = i965_driver_data(ctx);
2237 struct object_surface *obj_surface;
2239 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2240 i965_DestroySurfaces(ctx,
2241 &gen7_mfd_context->jpeg_wa_surface_id,
2244 status = i965_CreateSurfaces(ctx,
2245 gen7_jpeg_wa_clip.width,
2246 gen7_jpeg_wa_clip.height,
2247 VA_RT_FORMAT_YUV420,
2249 &gen7_mfd_context->jpeg_wa_surface_id);
2250 assert(status == VA_STATUS_SUCCESS);
2252 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2253 assert(obj_surface);
2254 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2255 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2257 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2258 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2262 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2264 gen7_jpeg_wa_clip.data_size,
2265 gen7_jpeg_wa_clip.data);
2270 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2271 struct gen7_mfd_context *gen7_mfd_context)
2273 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2275 BEGIN_BCS_BATCH(batch, 5);
2276 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2277 OUT_BCS_BATCH(batch,
2278 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2279 (MFD_MODE_VLD << 15) | /* VLD mode */
2280 (0 << 10) | /* disable Stream-Out */
2281 (0 << 9) | /* Post Deblocking Output */
2282 (1 << 8) | /* Pre Deblocking Output */
2283 (0 << 5) | /* not in stitch mode */
2284 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2285 (MFX_FORMAT_AVC << 0));
2286 OUT_BCS_BATCH(batch,
2287 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2288 (0 << 3) | /* terminate if AVC mbdata error occurs */
2289 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2292 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2293 OUT_BCS_BATCH(batch, 0); /* reserved */
2294 ADVANCE_BCS_BATCH(batch);
2298 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2299 struct gen7_mfd_context *gen7_mfd_context)
2301 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2302 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2304 BEGIN_BCS_BATCH(batch, 6);
2305 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2306 OUT_BCS_BATCH(batch, 0);
2307 OUT_BCS_BATCH(batch,
2308 ((obj_surface->orig_width - 1) << 18) |
2309 ((obj_surface->orig_height - 1) << 4));
2310 OUT_BCS_BATCH(batch,
2311 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2312 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2313 (0 << 22) | /* surface object control state, ignored */
2314 ((obj_surface->width - 1) << 3) | /* pitch */
2315 (0 << 2) | /* must be 0 */
2316 (1 << 1) | /* must be tiled */
2317 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2318 OUT_BCS_BATCH(batch,
2319 (0 << 16) | /* X offset for U(Cb), must be 0 */
2320 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2321 OUT_BCS_BATCH(batch,
2322 (0 << 16) | /* X offset for V(Cr), must be 0 */
2323 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2324 ADVANCE_BCS_BATCH(batch);
2328 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2329 struct gen7_mfd_context *gen7_mfd_context)
2331 struct i965_driver_data *i965 = i965_driver_data(ctx);
2332 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2333 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2337 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2342 BEGIN_BCS_BATCH(batch, 61);
2343 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2344 OUT_BCS_RELOC64(batch,
2346 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2348 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2351 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2352 OUT_BCS_BATCH(batch, 0);
2353 OUT_BCS_BATCH(batch, 0);
2355 /* uncompressed-video & stream out 7-12 */
2356 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2357 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2358 OUT_BCS_BATCH(batch, 0);
2359 OUT_BCS_BATCH(batch, 0);
2360 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2363 /* the DW 13-15 is for intra row store scratch */
2364 OUT_BCS_RELOC64(batch,
2366 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2369 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2371 /* the DW 16-18 is for deblocking filter */
2372 OUT_BCS_BATCH(batch, 0);
2373 OUT_BCS_BATCH(batch, 0);
2374 OUT_BCS_BATCH(batch, 0);
2377 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2378 OUT_BCS_BATCH(batch, 0);
2379 OUT_BCS_BATCH(batch, 0);
2381 OUT_BCS_BATCH(batch, 0);
2383 /* the DW52-54 is for mb status address */
2384 OUT_BCS_BATCH(batch, 0);
2385 OUT_BCS_BATCH(batch, 0);
2386 OUT_BCS_BATCH(batch, 0);
2387 /* the DW56-60 is for ILDB & second ILDB address */
2388 OUT_BCS_BATCH(batch, 0);
2389 OUT_BCS_BATCH(batch, 0);
2390 OUT_BCS_BATCH(batch, 0);
2391 OUT_BCS_BATCH(batch, 0);
2392 OUT_BCS_BATCH(batch, 0);
2393 OUT_BCS_BATCH(batch, 0);
2395 ADVANCE_BCS_BATCH(batch);
2397 dri_bo_unreference(intra_bo);
2401 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2402 struct gen7_mfd_context *gen7_mfd_context)
2404 struct i965_driver_data *i965 = i965_driver_data(ctx);
2405 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2406 dri_bo *bsd_mpc_bo, *mpr_bo;
2408 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2409 "bsd mpc row store",
2410 11520, /* 1.5 * 120 * 64 */
2413 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2415 7680, /* 1. 0 * 120 * 64 */
2418 BEGIN_BCS_BATCH(batch, 10);
2419 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2421 OUT_BCS_RELOC64(batch,
2423 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2426 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2428 OUT_BCS_RELOC64(batch,
2430 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2432 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2434 OUT_BCS_BATCH(batch, 0);
2435 OUT_BCS_BATCH(batch, 0);
2436 OUT_BCS_BATCH(batch, 0);
2438 ADVANCE_BCS_BATCH(batch);
2440 dri_bo_unreference(bsd_mpc_bo);
2441 dri_bo_unreference(mpr_bo);
2445 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2446 struct gen7_mfd_context *gen7_mfd_context)
2452 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2453 struct gen7_mfd_context *gen7_mfd_context)
2455 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2457 int mbaff_frame_flag = 0;
2458 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2460 BEGIN_BCS_BATCH(batch, 16);
2461 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2462 OUT_BCS_BATCH(batch,
2463 width_in_mbs * height_in_mbs);
2464 OUT_BCS_BATCH(batch,
2465 ((height_in_mbs - 1) << 16) |
2466 ((width_in_mbs - 1) << 0));
2467 OUT_BCS_BATCH(batch,
2472 (0 << 12) | /* differ from GEN6 */
2475 OUT_BCS_BATCH(batch,
2476 (1 << 10) | /* 4:2:0 */
2477 (1 << 7) | /* CABAC */
2483 (mbaff_frame_flag << 1) |
2485 OUT_BCS_BATCH(batch, 0);
2486 OUT_BCS_BATCH(batch, 0);
2487 OUT_BCS_BATCH(batch, 0);
2488 OUT_BCS_BATCH(batch, 0);
2489 OUT_BCS_BATCH(batch, 0);
2490 OUT_BCS_BATCH(batch, 0);
2491 OUT_BCS_BATCH(batch, 0);
2492 OUT_BCS_BATCH(batch, 0);
2493 OUT_BCS_BATCH(batch, 0);
2494 OUT_BCS_BATCH(batch, 0);
2495 OUT_BCS_BATCH(batch, 0);
2496 ADVANCE_BCS_BATCH(batch);
2500 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2501 struct gen7_mfd_context *gen7_mfd_context)
2503 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2506 BEGIN_BCS_BATCH(batch, 71);
2507 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2509 /* reference surfaces 0..15 */
2510 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2511 OUT_BCS_BATCH(batch, 0); /* top */
2512 OUT_BCS_BATCH(batch, 0); /* bottom */
2515 OUT_BCS_BATCH(batch, 0);
2517 /* the current decoding frame/field */
2518 OUT_BCS_BATCH(batch, 0); /* top */
2519 OUT_BCS_BATCH(batch, 0);
2520 OUT_BCS_BATCH(batch, 0);
2523 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2524 OUT_BCS_BATCH(batch, 0);
2525 OUT_BCS_BATCH(batch, 0);
2528 OUT_BCS_BATCH(batch, 0);
2529 OUT_BCS_BATCH(batch, 0);
2531 ADVANCE_BCS_BATCH(batch);
2535 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2536 struct gen7_mfd_context *gen7_mfd_context)
2538 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2540 BEGIN_BCS_BATCH(batch, 11);
2541 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2542 OUT_BCS_RELOC64(batch,
2543 gen7_mfd_context->jpeg_wa_slice_data_bo,
2544 I915_GEM_DOMAIN_INSTRUCTION, 0,
2546 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2547 OUT_BCS_BATCH(batch, 0);
2548 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2549 OUT_BCS_BATCH(batch, 0);
2550 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2551 OUT_BCS_BATCH(batch, 0);
2552 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2553 OUT_BCS_BATCH(batch, 0);
2554 ADVANCE_BCS_BATCH(batch);
2558 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2559 struct gen7_mfd_context *gen7_mfd_context)
2561 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2563 /* the input bitsteam format on GEN7 differs from GEN6 */
2564 BEGIN_BCS_BATCH(batch, 6);
2565 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2566 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2567 OUT_BCS_BATCH(batch, 0);
2568 OUT_BCS_BATCH(batch,
2574 OUT_BCS_BATCH(batch,
2575 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2578 (1 << 3) | /* LastSlice Flag */
2579 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2580 OUT_BCS_BATCH(batch, 0);
2581 ADVANCE_BCS_BATCH(batch);
2585 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2586 struct gen7_mfd_context *gen7_mfd_context)
2588 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2589 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2590 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2591 int first_mb_in_slice = 0;
2592 int slice_type = SLICE_TYPE_I;
2594 BEGIN_BCS_BATCH(batch, 11);
2595 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2596 OUT_BCS_BATCH(batch, slice_type);
2597 OUT_BCS_BATCH(batch,
2598 (num_ref_idx_l1 << 24) |
2599 (num_ref_idx_l0 << 16) |
2602 OUT_BCS_BATCH(batch,
2604 (1 << 27) | /* disable Deblocking */
2606 (gen7_jpeg_wa_clip.qp << 16) |
2609 OUT_BCS_BATCH(batch,
2610 (slice_ver_pos << 24) |
2611 (slice_hor_pos << 16) |
2612 (first_mb_in_slice << 0));
2613 OUT_BCS_BATCH(batch,
2614 (next_slice_ver_pos << 16) |
2615 (next_slice_hor_pos << 0));
2616 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2617 OUT_BCS_BATCH(batch, 0);
2618 OUT_BCS_BATCH(batch, 0);
2619 OUT_BCS_BATCH(batch, 0);
2620 OUT_BCS_BATCH(batch, 0);
2621 ADVANCE_BCS_BATCH(batch);
2625 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2626 struct gen7_mfd_context *gen7_mfd_context)
2628 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2629 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2630 intel_batchbuffer_emit_mi_flush(batch);
2631 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2632 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2633 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2634 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2635 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2636 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2637 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2639 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2640 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2641 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2647 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2648 struct decode_state *decode_state,
2649 struct gen7_mfd_context *gen7_mfd_context)
2651 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2652 VAPictureParameterBufferJPEGBaseline *pic_param;
2653 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2654 dri_bo *slice_data_bo;
2655 int i, j, max_selector = 0;
2657 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2658 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2660 /* Currently only support Baseline DCT */
2661 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2662 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2664 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2666 intel_batchbuffer_emit_mi_flush(batch);
2667 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2668 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2669 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2670 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2671 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2673 for (j = 0; j < decode_state->num_slice_params; j++) {
2674 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2675 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2676 slice_data_bo = decode_state->slice_datas[j]->bo;
2677 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2679 if (j == decode_state->num_slice_params - 1)
2680 next_slice_group_param = NULL;
2682 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2684 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2687 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2689 if (i < decode_state->slice_params[j]->num_elements - 1)
2690 next_slice_param = slice_param + 1;
2692 next_slice_param = next_slice_group_param;
2694 for (component = 0; component < slice_param->num_components; component++) {
2695 if (max_selector < slice_param->components[component].dc_table_selector)
2696 max_selector = slice_param->components[component].dc_table_selector;
2698 if (max_selector < slice_param->components[component].ac_table_selector)
2699 max_selector = slice_param->components[component].ac_table_selector;
2706 assert(max_selector < 2);
2707 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2709 for (j = 0; j < decode_state->num_slice_params; j++) {
2710 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2711 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2712 slice_data_bo = decode_state->slice_datas[j]->bo;
2713 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2715 if (j == decode_state->num_slice_params - 1)
2716 next_slice_group_param = NULL;
2718 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2720 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2721 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2723 if (i < decode_state->slice_params[j]->num_elements - 1)
2724 next_slice_param = slice_param + 1;
2726 next_slice_param = next_slice_group_param;
2728 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2733 intel_batchbuffer_end_atomic(batch);
2734 intel_batchbuffer_flush(batch);
2737 static const int vp8_dc_qlookup[128] = {
2738 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2739 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2740 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2741 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2742 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2743 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2744 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2745 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2748 static const int vp8_ac_qlookup[128] = {
2749 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2750 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2751 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2752 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2753 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2754 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2755 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2756 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2759 static inline unsigned int vp8_clip_quantization_index(int index)
2770 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2771 struct decode_state *decode_state,
2772 struct gen7_mfd_context *gen7_mfd_context)
2774 struct object_surface *obj_surface;
2775 struct i965_driver_data *i965 = i965_driver_data(ctx);
2777 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2778 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2779 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2781 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2782 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2784 intel_update_vp8_frame_store_index(ctx,
2787 gen7_mfd_context->reference_surface);
2789 /* Current decoded picture */
2790 obj_surface = decode_state->render_object;
2791 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2793 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2794 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2795 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2796 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2798 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2799 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2800 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2801 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2803 intel_ensure_vp8_segmentation_buffer(ctx,
2804 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2806 /* The same as AVC */
2807 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2808 bo = dri_bo_alloc(i965->intel.bufmgr,
2813 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2814 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2816 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2817 bo = dri_bo_alloc(i965->intel.bufmgr,
2818 "deblocking filter row store",
2819 width_in_mbs * 64 * 4,
2822 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2823 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2825 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2826 bo = dri_bo_alloc(i965->intel.bufmgr,
2827 "bsd mpc row store",
2828 width_in_mbs * 64 * 2,
2831 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2832 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2834 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2835 bo = dri_bo_alloc(i965->intel.bufmgr,
2837 width_in_mbs * 64 * 2,
2840 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2841 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2843 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2847 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2848 struct decode_state *decode_state,
2849 struct gen7_mfd_context *gen7_mfd_context)
2851 struct i965_driver_data *i965 = i965_driver_data(ctx);
2852 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2853 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2854 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2855 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2856 dri_bo *probs_bo = decode_state->probability_data->bo;
2858 unsigned int quantization_value[4][6];
2860 /* There is no safe way to error out if the segmentation buffer
2861 could not be allocated. So, instead of aborting, simply decode
2862 something even if the result may look totally inacurate */
2863 const unsigned int enable_segmentation =
2864 pic_param->pic_fields.bits.segmentation_enabled &&
2865 gen7_mfd_context->segmentation_buffer.valid;
2867 log2num = (int)log2(slice_param->num_of_partitions - 1);
2869 BEGIN_BCS_BATCH(batch, 38);
2870 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2871 OUT_BCS_BATCH(batch,
2872 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2873 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2874 OUT_BCS_BATCH(batch,
2876 pic_param->pic_fields.bits.sharpness_level << 16 |
2877 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2878 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2879 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2880 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2881 (enable_segmentation &&
2882 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2883 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2884 (enable_segmentation &&
2885 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2886 (enable_segmentation &&
2887 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2888 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2889 pic_param->pic_fields.bits.filter_type << 4 |
2890 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2891 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2893 OUT_BCS_BATCH(batch,
2894 pic_param->loop_filter_level[3] << 24 |
2895 pic_param->loop_filter_level[2] << 16 |
2896 pic_param->loop_filter_level[1] << 8 |
2897 pic_param->loop_filter_level[0] << 0);
2899 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2900 for (i = 0; i < 4; i++) {
2901 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2902 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2903 quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2904 /* 101581>>16 is equivalent to 155/100 */
2905 quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2906 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2907 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2909 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2910 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2912 OUT_BCS_BATCH(batch,
2913 quantization_value[i][0] << 16 | /* Y1AC */
2914 quantization_value[i][1] << 0); /* Y1DC */
2915 OUT_BCS_BATCH(batch,
2916 quantization_value[i][5] << 16 | /* UVAC */
2917 quantization_value[i][4] << 0); /* UVDC */
2918 OUT_BCS_BATCH(batch,
2919 quantization_value[i][3] << 16 | /* Y2AC */
2920 quantization_value[i][2] << 0); /* Y2DC */
2923 /* CoeffProbability table for non-key frame, DW16-DW18 */
2925 OUT_BCS_RELOC64(batch, probs_bo,
2926 0, I915_GEM_DOMAIN_INSTRUCTION,
2928 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2930 OUT_BCS_BATCH(batch, 0);
2931 OUT_BCS_BATCH(batch, 0);
2932 OUT_BCS_BATCH(batch, 0);
2935 OUT_BCS_BATCH(batch,
2936 pic_param->mb_segment_tree_probs[2] << 16 |
2937 pic_param->mb_segment_tree_probs[1] << 8 |
2938 pic_param->mb_segment_tree_probs[0] << 0);
2940 OUT_BCS_BATCH(batch,
2941 pic_param->prob_skip_false << 24 |
2942 pic_param->prob_intra << 16 |
2943 pic_param->prob_last << 8 |
2944 pic_param->prob_gf << 0);
2946 OUT_BCS_BATCH(batch,
2947 pic_param->y_mode_probs[3] << 24 |
2948 pic_param->y_mode_probs[2] << 16 |
2949 pic_param->y_mode_probs[1] << 8 |
2950 pic_param->y_mode_probs[0] << 0);
2952 OUT_BCS_BATCH(batch,
2953 pic_param->uv_mode_probs[2] << 16 |
2954 pic_param->uv_mode_probs[1] << 8 |
2955 pic_param->uv_mode_probs[0] << 0);
2957 /* MV update value, DW23-DW32 */
2958 for (i = 0; i < 2; i++) {
2959 for (j = 0; j < 20; j += 4) {
2960 OUT_BCS_BATCH(batch,
2961 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2962 pic_param->mv_probs[i][j + 2] << 16 |
2963 pic_param->mv_probs[i][j + 1] << 8 |
2964 pic_param->mv_probs[i][j + 0] << 0);
2968 OUT_BCS_BATCH(batch,
2969 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2970 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2971 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2972 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2974 OUT_BCS_BATCH(batch,
2975 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2976 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2977 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2978 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2980 /* segmentation id stream base address, DW35-DW37 */
2981 if (enable_segmentation) {
2982 OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2983 0, I915_GEM_DOMAIN_INSTRUCTION,
2985 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2987 OUT_BCS_BATCH(batch, 0);
2988 OUT_BCS_BATCH(batch, 0);
2989 OUT_BCS_BATCH(batch, 0);
2991 ADVANCE_BCS_BATCH(batch);
2995 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2996 VAPictureParameterBufferVP8 *pic_param,
2997 VASliceParameterBufferVP8 *slice_param,
2998 dri_bo *slice_data_bo,
2999 struct gen7_mfd_context *gen7_mfd_context)
3001 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3003 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3004 unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3005 unsigned int partition_size_0 = slice_param->partition_size[0];
3007 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3008 if (used_bits == 8) {
3011 partition_size_0 -= 1;
3014 assert(slice_param->num_of_partitions >= 2);
3015 assert(slice_param->num_of_partitions <= 9);
3017 log2num = (int)log2(slice_param->num_of_partitions - 1);
3019 BEGIN_BCS_BATCH(batch, 22);
3020 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3021 OUT_BCS_BATCH(batch,
3022 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3023 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
3025 (slice_param->macroblock_offset & 0x7));
3026 OUT_BCS_BATCH(batch,
3027 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3030 OUT_BCS_BATCH(batch, partition_size_0 + 1);
3031 OUT_BCS_BATCH(batch, offset);
3032 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3033 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3034 for (i = 1; i < 9; i++) {
3035 if (i < slice_param->num_of_partitions) {
3036 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3037 OUT_BCS_BATCH(batch, offset);
3039 OUT_BCS_BATCH(batch, 0);
3040 OUT_BCS_BATCH(batch, 0);
3043 offset += slice_param->partition_size[i];
3046 OUT_BCS_BATCH(batch, 0); /* concealment method */
3048 ADVANCE_BCS_BATCH(batch);
3052 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3053 struct decode_state *decode_state,
3054 struct gen7_mfd_context *gen7_mfd_context)
3056 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3057 VAPictureParameterBufferVP8 *pic_param;
3058 VASliceParameterBufferVP8 *slice_param;
3059 dri_bo *slice_data_bo;
3061 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3062 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3064 /* one slice per frame */
3065 if (decode_state->num_slice_params != 1 ||
3066 (!decode_state->slice_params ||
3067 !decode_state->slice_params[0] ||
3068 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3069 (!decode_state->slice_datas ||
3070 !decode_state->slice_datas[0] ||
3071 !decode_state->slice_datas[0]->bo) ||
3072 !decode_state->probability_data) {
3073 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3078 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3079 slice_data_bo = decode_state->slice_datas[0]->bo;
3081 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3082 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3083 intel_batchbuffer_emit_mi_flush(batch);
3084 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3085 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3086 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3087 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3088 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3089 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3090 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3091 intel_batchbuffer_end_atomic(batch);
3092 intel_batchbuffer_flush(batch);
3096 gen8_mfd_decode_picture(VADriverContextP ctx,
3098 union codec_state *codec_state,
3099 struct hw_context *hw_context)
3102 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3103 struct decode_state *decode_state = &codec_state->decode;
3106 assert(gen7_mfd_context);
3108 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3110 if (vaStatus != VA_STATUS_SUCCESS)
3113 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3116 case VAProfileMPEG2Simple:
3117 case VAProfileMPEG2Main:
3118 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3121 case VAProfileH264ConstrainedBaseline:
3122 case VAProfileH264Main:
3123 case VAProfileH264High:
3124 case VAProfileH264StereoHigh:
3125 case VAProfileH264MultiviewHigh:
3126 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3129 case VAProfileVC1Simple:
3130 case VAProfileVC1Main:
3131 case VAProfileVC1Advanced:
3132 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3135 case VAProfileJPEGBaseline:
3136 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3139 case VAProfileVP8Version0_3:
3140 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3148 vaStatus = VA_STATUS_SUCCESS;
3155 gen8_mfd_context_destroy(void *hw_context)
3157 VADriverContextP ctx;
3158 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3160 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3162 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3163 gen7_mfd_context->post_deblocking_output.bo = NULL;
3165 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3166 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3168 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3169 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3171 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3172 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3174 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3175 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3177 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3178 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3180 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3181 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3183 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3184 gen7_mfd_context->segmentation_buffer.bo = NULL;
3186 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3188 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3189 i965_DestroySurfaces(ctx,
3190 &gen7_mfd_context->jpeg_wa_surface_id,
3192 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3195 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3196 free(gen7_mfd_context);
3199 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3200 struct gen7_mfd_context *gen7_mfd_context)
3202 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3203 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3204 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3205 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3209 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3211 struct intel_driver_data *intel = intel_driver_data(ctx);
3212 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3215 if (!gen7_mfd_context)
3218 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3219 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3220 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3222 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3223 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3224 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3227 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3228 gen7_mfd_context->segmentation_buffer.valid = 0;
3230 switch (obj_config->profile) {
3231 case VAProfileMPEG2Simple:
3232 case VAProfileMPEG2Main:
3233 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3236 case VAProfileH264ConstrainedBaseline:
3237 case VAProfileH264Main:
3238 case VAProfileH264High:
3239 case VAProfileH264StereoHigh:
3240 case VAProfileH264MultiviewHigh:
3241 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3247 gen7_mfd_context->driver_context = ctx;
3248 return (struct hw_context *)gen7_mfd_context;