2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
48 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52 #define MFC_SOFTWARE_BATCH 0
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75 16, 11, 10, 16, 24, 40, 51, 61,
76 12, 12, 14, 19, 26, 58, 60, 55,
77 14, 13, 16, 24, 40, 57, 69, 56,
78 14, 17, 22, 29, 51, 87, 80, 62,
79 18, 22, 37, 56, 68, 109, 103, 77,
80 24, 35, 55, 64, 81, 104, 113, 92,
81 49, 64, 78, 87, 103, 121, 120, 101,
82 72, 92, 95, 98, 112, 100, 103, 99
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88 17, 18, 24, 47, 99, 99, 99, 99,
89 18, 21, 26, 66, 99, 99, 99, 99,
90 24, 26, 56, 99, 99, 99, 99, 99,
91 47, 66, 99, 99, 99, 99, 99, 99,
92 99, 99, 99, 99, 99, 99, 99, 99,
93 99, 99, 99, 99, 99, 99, 99, 99,
94 99, 99, 99, 99, 99, 99, 99, 99,
95 99, 99, 99, 99, 99, 99, 99, 99
99 static const int va_to_gen7_jpeg_hufftable[2] = {
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
112 static struct i965_kernel gen8_mfc_kernels[] = {
114 "MFC AVC INTRA BATCHBUFFER ",
115 MFC_BATCHBUFFER_AVC_INTRA,
116 gen8_mfc_batchbuffer_avc,
117 sizeof(gen8_mfc_batchbuffer_avc),
122 static struct i965_kernel gen9_mfc_kernels[] = {
124 "MFC AVC INTRA BATCHBUFFER ",
125 MFC_BATCHBUFFER_AVC_INTRA,
126 gen9_mfc_batchbuffer_avc,
127 sizeof(gen9_mfc_batchbuffer_avc),
133 #define INTER_MODE_MASK 0x03
134 #define INTER_8X8 0x03
135 #define INTER_16X8 0x01
136 #define INTER_8X16 0x02
137 #define SUBMB_SHAPE_MASK 0x00FF00
138 #define INTER_16X16 0x00
140 #define INTER_MV8 (4 << 20)
141 #define INTER_MV32 (6 << 20)
145 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
147 struct intel_encoder_context *encoder_context)
149 struct intel_batchbuffer *batch = encoder_context->base.batch;
150 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
152 assert(standard_select == MFX_FORMAT_MPEG2 ||
153 standard_select == MFX_FORMAT_AVC ||
154 standard_select == MFX_FORMAT_JPEG ||
155 standard_select == MFX_FORMAT_VP8);
157 BEGIN_BCS_BATCH(batch, 5);
159 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
161 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
162 (MFD_MODE_VLD << 15) | /* VLD mode */
163 (0 << 10) | /* Stream-Out Enable */
164 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
165 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
166 (0 << 6) | /* frame statistics stream-out enable*/
167 (0 << 5) | /* not in stitch mode */
168 (1 << 4) | /* encoding mode */
169 (standard_select << 0)); /* standard select: avc or mpeg2 or jpeg*/
171 (0 << 7) | /* expand NOA bus flag */
172 (0 << 6) | /* disable slice-level clock gating */
173 (0 << 5) | /* disable clock gating for NOA */
174 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
175 (0 << 3) | /* terminate if AVC mbdata error occurs */
176 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
179 OUT_BCS_BATCH(batch, 0);
180 OUT_BCS_BATCH(batch, 0);
182 ADVANCE_BCS_BATCH(batch);
186 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
188 struct intel_batchbuffer *batch = encoder_context->base.batch;
189 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
191 BEGIN_BCS_BATCH(batch, 6);
193 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
194 OUT_BCS_BATCH(batch, 0);
196 ((mfc_context->surface_state.height - 1) << 18) |
197 ((mfc_context->surface_state.width - 1) << 4));
199 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
200 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
201 (0 << 22) | /* surface object control state, FIXME??? */
202 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
203 (0 << 2) | /* must be 0 for interleave U/V */
204 (1 << 1) | /* must be tiled */
205 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
207 (0 << 16) | /* must be 0 for interleave U/V */
208 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
209 OUT_BCS_BATCH(batch, 0);
211 ADVANCE_BCS_BATCH(batch);
215 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
216 struct intel_encoder_context *encoder_context)
218 struct intel_batchbuffer *batch = encoder_context->base.batch;
219 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
220 struct gen6_vme_context *vme_context = encoder_context->vme_context;
222 unsigned int bse_offset;
224 BEGIN_BCS_BATCH(batch, 26);
226 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
227 /* the DW1-3 is for the MFX indirect bistream offset */
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
232 /* the DW4-5 is the MFX upper bound */
233 if (encoder_context->codec == CODEC_VP8) {
235 mfc_context->mfc_indirect_pak_bse_object.bo,
236 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237 mfc_context->mfc_indirect_pak_bse_object.end_offset);
238 OUT_BCS_BATCH(batch, 0);
240 OUT_BCS_BATCH(batch, 0);
241 OUT_BCS_BATCH(batch, 0);
244 if(encoder_context->codec != CODEC_JPEG) {
245 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
246 /* the DW6-10 is for MFX Indirect MV Object Base Address */
247 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
248 OUT_BCS_BATCH(batch, 0);
249 OUT_BCS_BATCH(batch, 0);
250 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
251 OUT_BCS_BATCH(batch, 0);
253 /* No VME for JPEG */
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
261 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
266 OUT_BCS_BATCH(batch, 0);
268 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
273 OUT_BCS_BATCH(batch, 0);
275 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
276 bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
278 mfc_context->mfc_indirect_pak_bse_object.bo,
279 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
285 mfc_context->mfc_indirect_pak_bse_object.bo,
286 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
287 mfc_context->mfc_indirect_pak_bse_object.end_offset);
288 OUT_BCS_BATCH(batch, 0);
290 ADVANCE_BCS_BATCH(batch);
294 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
295 struct intel_encoder_context *encoder_context)
297 struct intel_batchbuffer *batch = encoder_context->base.batch;
298 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
299 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
301 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
302 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
304 BEGIN_BCS_BATCH(batch, 16);
306 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
307 /*DW1. MB setting of frame */
309 ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
311 ((height_in_mbs - 1) << 16) |
312 ((width_in_mbs - 1) << 0));
315 (0 << 24) | /* Second Chroma QP Offset */
316 (0 << 16) | /* Chroma QP Offset */
317 (0 << 14) | /* Max-bit conformance Intra flag */
318 (0 << 13) | /* Max Macroblock size conformance Inter flag */
319 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
320 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
321 (0 << 8) | /* FIXME: Image Structure */
322 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
324 (0 << 16) | /* Mininum Frame size */
325 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
326 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
327 (0 << 13) | /* CABAC 0 word insertion test enable */
328 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
329 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
330 (0 << 8) | /* FIXME: MbMvFormatFlag */
331 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
332 (0 << 6) | /* Only valid for VLD decoding mode */
333 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
334 (0 << 4) | /* Direct 8x8 inference flag */
335 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
336 (1 << 2) | /* Frame MB only flag */
337 (0 << 1) | /* MBAFF mode is in active */
338 (0 << 0)); /* Field picture flag */
339 /* DW5 Trellis quantization */
340 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
341 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
342 (0xBB8 << 16) | /* InterMbMaxSz */
343 (0xEE8) ); /* IntraMbMaxSz */
344 OUT_BCS_BATCH(batch, 0); /* Reserved */
346 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
347 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
348 /* DW10. Bit setting for MB */
349 OUT_BCS_BATCH(batch, 0x8C000000);
350 OUT_BCS_BATCH(batch, 0x00010000);
352 OUT_BCS_BATCH(batch, 0);
353 OUT_BCS_BATCH(batch, 0x02010100);
354 /* DW14. For short format */
355 OUT_BCS_BATCH(batch, 0);
356 OUT_BCS_BATCH(batch, 0);
358 ADVANCE_BCS_BATCH(batch);
362 gen8_mfc_qm_state(VADriverContextP ctx,
366 struct intel_encoder_context *encoder_context)
368 struct intel_batchbuffer *batch = encoder_context->base.batch;
369 unsigned int qm_buffer[16];
371 assert(qm_length <= 16);
372 assert(sizeof(*qm) == 4);
373 memcpy(qm_buffer, qm, qm_length * 4);
375 BEGIN_BCS_BATCH(batch, 18);
376 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377 OUT_BCS_BATCH(batch, qm_type << 0);
378 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379 ADVANCE_BCS_BATCH(batch);
383 gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
385 unsigned int qm[16] = {
386 0x10101010, 0x10101010, 0x10101010, 0x10101010,
387 0x10101010, 0x10101010, 0x10101010, 0x10101010,
388 0x10101010, 0x10101010, 0x10101010, 0x10101010,
389 0x10101010, 0x10101010, 0x10101010, 0x10101010
392 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
393 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
394 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
395 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
399 gen8_mfc_fqm_state(VADriverContextP ctx,
403 struct intel_encoder_context *encoder_context)
405 struct intel_batchbuffer *batch = encoder_context->base.batch;
406 unsigned int fqm_buffer[32];
408 assert(fqm_length <= 32);
409 assert(sizeof(*fqm) == 4);
410 memcpy(fqm_buffer, fqm, fqm_length * 4);
412 BEGIN_BCS_BATCH(batch, 34);
413 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
414 OUT_BCS_BATCH(batch, fqm_type << 0);
415 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
416 ADVANCE_BCS_BATCH(batch);
420 gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
422 unsigned int qm[32] = {
423 0x10001000, 0x10001000, 0x10001000, 0x10001000,
424 0x10001000, 0x10001000, 0x10001000, 0x10001000,
425 0x10001000, 0x10001000, 0x10001000, 0x10001000,
426 0x10001000, 0x10001000, 0x10001000, 0x10001000,
427 0x10001000, 0x10001000, 0x10001000, 0x10001000,
428 0x10001000, 0x10001000, 0x10001000, 0x10001000,
429 0x10001000, 0x10001000, 0x10001000, 0x10001000,
430 0x10001000, 0x10001000, 0x10001000, 0x10001000
433 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
434 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
435 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
436 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
440 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
441 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
442 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
443 struct intel_batchbuffer *batch)
446 batch = encoder_context->base.batch;
448 if (data_bits_in_last_dw == 0)
449 data_bits_in_last_dw = 32;
451 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
453 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
455 (0 << 16) | /* always start at offset 0 */
456 (data_bits_in_last_dw << 8) |
457 (skip_emul_byte_count << 4) |
458 (!!emulation_flag << 3) |
459 ((!!is_last_header) << 2) |
460 ((!!is_end_of_slice) << 1) |
461 (0 << 0)); /* FIXME: ??? */
462 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
464 ADVANCE_BCS_BATCH(batch);
468 static void gen8_mfc_init(VADriverContextP ctx,
469 struct encode_state *encode_state,
470 struct intel_encoder_context *encoder_context)
472 struct i965_driver_data *i965 = i965_driver_data(ctx);
473 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
476 int width_in_mbs = 0;
477 int height_in_mbs = 0;
478 int slice_batchbuffer_size;
480 if (encoder_context->codec == CODEC_H264 ||
481 encoder_context->codec == CODEC_H264_MVC) {
482 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
483 width_in_mbs = pSequenceParameter->picture_width_in_mbs;
484 height_in_mbs = pSequenceParameter->picture_height_in_mbs;
485 } else if (encoder_context->codec == CODEC_MPEG2) {
486 VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
488 assert(encoder_context->codec == CODEC_MPEG2);
490 width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
491 height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
493 assert(encoder_context->codec == CODEC_JPEG);
494 VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
496 width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
497 height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
500 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
501 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
503 /*Encode common setup for MFC*/
504 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
505 mfc_context->post_deblocking_output.bo = NULL;
507 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
508 mfc_context->pre_deblocking_output.bo = NULL;
510 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
511 mfc_context->uncompressed_picture_source.bo = NULL;
513 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
514 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
516 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
517 if (mfc_context->direct_mv_buffers[i].bo != NULL)
518 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
519 mfc_context->direct_mv_buffers[i].bo = NULL;
522 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
523 if (mfc_context->reference_surfaces[i].bo != NULL)
524 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
525 mfc_context->reference_surfaces[i].bo = NULL;
528 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
529 bo = dri_bo_alloc(i965->intel.bufmgr,
534 mfc_context->intra_row_store_scratch_buffer.bo = bo;
536 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
537 bo = dri_bo_alloc(i965->intel.bufmgr,
539 width_in_mbs * height_in_mbs * 16,
542 mfc_context->macroblock_status_buffer.bo = bo;
544 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
545 bo = dri_bo_alloc(i965->intel.bufmgr,
547 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
550 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
552 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
553 bo = dri_bo_alloc(i965->intel.bufmgr,
555 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
558 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
560 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
561 mfc_context->mfc_batchbuffer_surface.bo = NULL;
563 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
564 mfc_context->aux_batchbuffer_surface.bo = NULL;
566 if (mfc_context->aux_batchbuffer)
567 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
569 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
570 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
571 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
572 mfc_context->aux_batchbuffer_surface.pitch = 16;
573 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
574 mfc_context->aux_batchbuffer_surface.size_block = 16;
576 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
580 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
581 struct intel_encoder_context *encoder_context)
583 struct intel_batchbuffer *batch = encoder_context->base.batch;
584 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
587 BEGIN_BCS_BATCH(batch, 61);
589 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
591 /* the DW1-3 is for pre_deblocking */
592 if (mfc_context->pre_deblocking_output.bo)
593 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
594 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
597 OUT_BCS_BATCH(batch, 0); /* pre output addr */
599 OUT_BCS_BATCH(batch, 0);
600 OUT_BCS_BATCH(batch, 0);
601 /* the DW4-6 is for the post_deblocking */
603 if (mfc_context->post_deblocking_output.bo)
604 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
605 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
606 0); /* post output addr */
608 OUT_BCS_BATCH(batch, 0);
610 OUT_BCS_BATCH(batch, 0);
611 OUT_BCS_BATCH(batch, 0);
613 /* the DW7-9 is for the uncompressed_picture */
614 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
615 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
616 0); /* uncompressed data */
618 OUT_BCS_BATCH(batch, 0);
619 OUT_BCS_BATCH(batch, 0);
621 /* the DW10-12 is for the mb status */
622 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
623 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
624 0); /* StreamOut data*/
626 OUT_BCS_BATCH(batch, 0);
627 OUT_BCS_BATCH(batch, 0);
629 /* the DW13-15 is for the intra_row_store_scratch */
630 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
631 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
634 OUT_BCS_BATCH(batch, 0);
635 OUT_BCS_BATCH(batch, 0);
637 /* the DW16-18 is for the deblocking filter */
638 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
639 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642 OUT_BCS_BATCH(batch, 0);
643 OUT_BCS_BATCH(batch, 0);
645 /* the DW 19-50 is for Reference pictures*/
646 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
647 if ( mfc_context->reference_surfaces[i].bo != NULL) {
648 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
649 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
652 OUT_BCS_BATCH(batch, 0);
655 OUT_BCS_BATCH(batch, 0);
658 OUT_BCS_BATCH(batch, 0);
660 /* The DW 52-54 is for the MB status buffer */
661 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
662 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663 0); /* Macroblock status buffer*/
665 OUT_BCS_BATCH(batch, 0);
666 OUT_BCS_BATCH(batch, 0);
668 /* the DW 55-57 is the ILDB buffer */
669 OUT_BCS_BATCH(batch, 0);
670 OUT_BCS_BATCH(batch, 0);
671 OUT_BCS_BATCH(batch, 0);
673 /* the DW 58-60 is the second ILDB buffer */
674 OUT_BCS_BATCH(batch, 0);
675 OUT_BCS_BATCH(batch, 0);
676 OUT_BCS_BATCH(batch, 0);
678 ADVANCE_BCS_BATCH(batch);
682 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
683 struct intel_encoder_context *encoder_context)
685 struct intel_batchbuffer *batch = encoder_context->base.batch;
686 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
690 BEGIN_BCS_BATCH(batch, 71);
692 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
694 /* Reference frames and Current frames */
695 /* the DW1-32 is for the direct MV for reference */
696 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
697 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
698 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
699 I915_GEM_DOMAIN_INSTRUCTION, 0,
701 OUT_BCS_BATCH(batch, 0);
703 OUT_BCS_BATCH(batch, 0);
704 OUT_BCS_BATCH(batch, 0);
708 OUT_BCS_BATCH(batch, 0);
710 /* the DW34-36 is the MV for the current reference */
711 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
712 I915_GEM_DOMAIN_INSTRUCTION, 0,
715 OUT_BCS_BATCH(batch, 0);
716 OUT_BCS_BATCH(batch, 0);
719 for(i = 0; i < 32; i++) {
720 OUT_BCS_BATCH(batch, i/2);
722 OUT_BCS_BATCH(batch, 0);
723 OUT_BCS_BATCH(batch, 0);
725 ADVANCE_BCS_BATCH(batch);
730 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
731 struct intel_encoder_context *encoder_context)
733 struct intel_batchbuffer *batch = encoder_context->base.batch;
734 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
736 BEGIN_BCS_BATCH(batch, 10);
738 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
739 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
740 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
742 OUT_BCS_BATCH(batch, 0);
743 OUT_BCS_BATCH(batch, 0);
745 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
746 OUT_BCS_BATCH(batch, 0);
747 OUT_BCS_BATCH(batch, 0);
748 OUT_BCS_BATCH(batch, 0);
750 /* the DW7-9 is for Bitplane Read Buffer Base Address */
751 OUT_BCS_BATCH(batch, 0);
752 OUT_BCS_BATCH(batch, 0);
753 OUT_BCS_BATCH(batch, 0);
755 ADVANCE_BCS_BATCH(batch);
759 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
760 struct encode_state *encode_state,
761 struct intel_encoder_context *encoder_context)
763 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
765 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
766 mfc_context->set_surface_state(ctx, encoder_context);
767 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
768 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
769 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
770 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
771 mfc_context->avc_qm_state(ctx, encoder_context);
772 mfc_context->avc_fqm_state(ctx, encoder_context);
773 gen8_mfc_avc_directmode_state(ctx, encoder_context);
774 intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
778 static VAStatus gen8_mfc_run(VADriverContextP ctx,
779 struct encode_state *encode_state,
780 struct intel_encoder_context *encoder_context)
782 struct intel_batchbuffer *batch = encoder_context->base.batch;
784 intel_batchbuffer_flush(batch); //run the pipeline
786 return VA_STATUS_SUCCESS;
791 gen8_mfc_stop(VADriverContextP ctx,
792 struct encode_state *encode_state,
793 struct intel_encoder_context *encoder_context,
794 int *encoded_bits_size)
796 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
797 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
798 VACodedBufferSegment *coded_buffer_segment;
800 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
801 assert(vaStatus == VA_STATUS_SUCCESS);
802 *encoded_bits_size = coded_buffer_segment->size * 8;
803 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
805 return VA_STATUS_SUCCESS;
810 gen8_mfc_avc_slice_state(VADriverContextP ctx,
811 VAEncPictureParameterBufferH264 *pic_param,
812 VAEncSliceParameterBufferH264 *slice_param,
813 struct encode_state *encode_state,
814 struct intel_encoder_context *encoder_context,
815 int rate_control_enable,
817 struct intel_batchbuffer *batch)
819 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
820 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
821 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
822 int beginmb = slice_param->macroblock_address;
823 int endmb = beginmb + slice_param->num_macroblocks;
824 int beginx = beginmb % width_in_mbs;
825 int beginy = beginmb / width_in_mbs;
826 int nextx = endmb % width_in_mbs;
827 int nexty = endmb / width_in_mbs;
828 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
829 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
831 unsigned char correct[6], grow, shrink;
833 int weighted_pred_idc = 0;
834 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
835 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
836 int num_ref_l0 = 0, num_ref_l1 = 0;
839 batch = encoder_context->base.batch;
841 if (slice_type == SLICE_TYPE_I) {
842 luma_log2_weight_denom = 0;
843 chroma_log2_weight_denom = 0;
844 } else if (slice_type == SLICE_TYPE_P) {
845 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
846 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
848 if (slice_param->num_ref_idx_active_override_flag)
849 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
850 } else if (slice_type == SLICE_TYPE_B) {
851 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
852 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
853 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
855 if (slice_param->num_ref_idx_active_override_flag) {
856 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
857 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
860 if (weighted_pred_idc == 2) {
861 /* 8.4.3 - Derivation process for prediction weights (8-279) */
862 luma_log2_weight_denom = 5;
863 chroma_log2_weight_denom = 5;
867 maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
868 maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
870 for (i = 0; i < 6; i++)
871 correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
873 grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
874 (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
875 shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
876 (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
878 BEGIN_BCS_BATCH(batch, 11);;
880 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
881 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
886 (chroma_log2_weight_denom << 8) |
887 (luma_log2_weight_denom << 0));
890 (weighted_pred_idc << 30) |
891 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
892 (slice_param->disable_deblocking_filter_idc << 27) |
893 (slice_param->cabac_init_idc << 24) |
894 (qp<<16) | /*Slice Quantization Parameter*/
895 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
896 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
898 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
900 slice_param->macroblock_address );
901 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
903 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
904 (1 << 30) | /*ResetRateControlCounter*/
905 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
906 (4 << 24) | /*RC Stable Tolerance, middle level*/
907 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
908 (0 << 22) | /*QP mode, don't modfiy CBP*/
909 (0 << 21) | /*MB Type Direct Conversion Enabled*/
910 (0 << 20) | /*MB Type Skip Conversion Enabled*/
911 (last_slice << 19) | /*IsLastSlice*/
912 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
913 (1 << 17) | /*HeaderPresentFlag*/
914 (1 << 16) | /*SliceData PresentFlag*/
915 (1 << 15) | /*TailPresentFlag*/
916 (1 << 13) | /*RBSP NAL TYPE*/
917 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
918 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
920 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
921 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
931 OUT_BCS_BATCH(batch, 0);
933 ADVANCE_BCS_BATCH(batch);
936 #define AVC_INTRA_RDO_OFFSET 4
937 #define AVC_INTER_RDO_OFFSET 10
938 #define AVC_INTER_MSG_OFFSET 8
939 #define AVC_INTER_MV_OFFSET 48
940 #define AVC_RDO_MASK 0xFFFF
942 #if MFC_SOFTWARE_BATCH
945 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
946 int qp,unsigned int *msg,
947 struct intel_encoder_context *encoder_context,
948 unsigned char target_mb_size, unsigned char max_mb_size,
949 struct intel_batchbuffer *batch)
951 int len_in_dwords = 12;
952 unsigned int intra_msg;
953 #define INTRA_MSG_FLAG (1 << 13)
954 #define INTRA_MBTYPE_MASK (0x1F0000)
956 batch = encoder_context->base.batch;
958 BEGIN_BCS_BATCH(batch, len_in_dwords);
960 intra_msg = msg[0] & 0xC0FF;
961 intra_msg |= INTRA_MSG_FLAG;
962 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
963 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
964 OUT_BCS_BATCH(batch, 0);
965 OUT_BCS_BATCH(batch, 0);
967 (0 << 24) | /* PackedMvNum, Debug*/
968 (0 << 20) | /* No motion vector */
969 (1 << 19) | /* CbpDcY */
970 (1 << 18) | /* CbpDcU */
971 (1 << 17) | /* CbpDcV */
974 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
975 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
976 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
978 /*Stuff for Intra MB*/
979 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
980 OUT_BCS_BATCH(batch, msg[2]);
981 OUT_BCS_BATCH(batch, msg[3]&0xFF);
983 /*MaxSizeInWord and TargetSzieInWord*/
984 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
985 (target_mb_size << 16) );
987 OUT_BCS_BATCH(batch, 0);
989 ADVANCE_BCS_BATCH(batch);
991 return len_in_dwords;
995 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
996 unsigned int *msg, unsigned int offset,
997 struct intel_encoder_context *encoder_context,
998 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
999 struct intel_batchbuffer *batch)
1001 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1002 int len_in_dwords = 12;
1003 unsigned int inter_msg = 0;
1005 batch = encoder_context->base.batch;
1007 #define MSG_MV_OFFSET 4
1008 unsigned int *mv_ptr;
1009 mv_ptr = msg + MSG_MV_OFFSET;
1010 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1011 * to convert them to be compatible with the format of AVC_PAK
1014 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1015 /* MV[0] and MV[2] are replicated */
1016 mv_ptr[4] = mv_ptr[0];
1017 mv_ptr[5] = mv_ptr[1];
1018 mv_ptr[2] = mv_ptr[8];
1019 mv_ptr[3] = mv_ptr[9];
1020 mv_ptr[6] = mv_ptr[8];
1021 mv_ptr[7] = mv_ptr[9];
1022 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1023 /* MV[0] and MV[1] are replicated */
1024 mv_ptr[2] = mv_ptr[0];
1025 mv_ptr[3] = mv_ptr[1];
1026 mv_ptr[4] = mv_ptr[16];
1027 mv_ptr[5] = mv_ptr[17];
1028 mv_ptr[6] = mv_ptr[24];
1029 mv_ptr[7] = mv_ptr[25];
1030 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1031 !(msg[1] & SUBMB_SHAPE_MASK)) {
1032 /* Don't touch MV[0] or MV[1] */
1033 mv_ptr[2] = mv_ptr[8];
1034 mv_ptr[3] = mv_ptr[9];
1035 mv_ptr[4] = mv_ptr[16];
1036 mv_ptr[5] = mv_ptr[17];
1037 mv_ptr[6] = mv_ptr[24];
1038 mv_ptr[7] = mv_ptr[25];
1042 BEGIN_BCS_BATCH(batch, len_in_dwords);
1044 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1048 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1049 if (msg[1] & SUBMB_SHAPE_MASK)
1052 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1053 OUT_BCS_BATCH(batch, offset);
1054 inter_msg = msg[0] & (0x1F00FFFF);
1055 inter_msg |= INTER_MV8;
1056 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1057 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1058 (msg[1] & SUBMB_SHAPE_MASK)) {
1059 inter_msg |= INTER_MV32;
1062 OUT_BCS_BATCH(batch, inter_msg);
1064 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1065 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1067 if ( slice_type == SLICE_TYPE_B) {
1068 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1070 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1073 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1076 inter_msg = msg[1] >> 8;
1077 /*Stuff for Inter MB*/
1078 OUT_BCS_BATCH(batch, inter_msg);
1079 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1080 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1082 /*MaxSizeInWord and TargetSzieInWord*/
1083 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1084 (target_mb_size << 16) );
1086 OUT_BCS_BATCH(batch, 0x0);
1088 ADVANCE_BCS_BATCH(batch);
1090 return len_in_dwords;
1094 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1095 struct encode_state *encode_state,
1096 struct intel_encoder_context *encoder_context,
1098 struct intel_batchbuffer *slice_batch)
1100 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1101 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1102 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1103 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1104 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1105 unsigned int *msg = NULL, offset = 0;
1106 unsigned char *msg_ptr = NULL;
1107 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1108 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1109 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1111 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1112 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1113 unsigned int tail_data[] = { 0x0, 0x0 };
1114 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1115 int is_intra = slice_type == SLICE_TYPE_I;
1119 if (rate_control_mode == VA_RC_CBR) {
1120 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1121 if (encode_state->slice_header_index[slice_index] == 0) {
1122 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1127 /* only support for 8-bit pixel bit-depth */
1128 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1129 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1130 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1131 assert(qp >= 0 && qp < 52);
1133 gen8_mfc_avc_slice_state(ctx,
1136 encode_state, encoder_context,
1137 (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1139 if ( slice_index == 0)
1140 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1142 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1144 dri_bo_map(vme_context->vme_output.bo , 1);
1145 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1148 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1150 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1153 for (i = pSliceParameter->macroblock_address;
1154 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1155 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1156 x = i % width_in_mbs;
1157 y = i / width_in_mbs;
1158 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1162 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1164 int inter_rdo, intra_rdo;
1165 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1166 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1167 offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1168 if (intra_rdo < inter_rdo) {
1169 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1171 msg += AVC_INTER_MSG_OFFSET;
1172 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1177 dri_bo_unmap(vme_context->vme_output.bo);
1180 mfc_context->insert_object(ctx, encoder_context,
1182 2, 1, 1, 0, slice_batch);
1184 mfc_context->insert_object(ctx, encoder_context,
1186 1, 1, 1, 0, slice_batch);
1191 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1192 struct encode_state *encode_state,
1193 struct intel_encoder_context *encoder_context)
1195 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1196 struct intel_batchbuffer *batch;
1200 batch = mfc_context->aux_batchbuffer;
1201 batch_bo = batch->buffer;
1202 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1203 gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1206 intel_batchbuffer_align(batch, 8);
1208 BEGIN_BCS_BATCH(batch, 2);
1209 OUT_BCS_BATCH(batch, 0);
1210 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1211 ADVANCE_BCS_BATCH(batch);
1213 dri_bo_reference(batch_bo);
1214 intel_batchbuffer_free(batch);
1215 mfc_context->aux_batchbuffer = NULL;
1223 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1224 struct encode_state *encode_state,
1225 struct intel_encoder_context *encoder_context)
1227 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1228 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1230 assert(vme_context->vme_output.bo);
1231 mfc_context->buffer_suface_setup(ctx,
1232 &mfc_context->gpe_context,
1233 &vme_context->vme_output,
1234 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1235 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1239 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1240 struct encode_state *encode_state,
1241 struct intel_encoder_context *encoder_context)
1243 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1244 assert(mfc_context->aux_batchbuffer_surface.bo);
1245 mfc_context->buffer_suface_setup(ctx,
1246 &mfc_context->gpe_context,
1247 &mfc_context->aux_batchbuffer_surface,
1248 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1249 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1253 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1254 struct encode_state *encode_state,
1255 struct intel_encoder_context *encoder_context)
1257 gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1258 gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1262 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1263 struct encode_state *encode_state,
1264 struct intel_encoder_context *encoder_context)
1266 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1267 struct gen8_interface_descriptor_data *desc;
1270 unsigned char *desc_ptr;
1272 bo = mfc_context->gpe_context.dynamic_state.bo;
1274 assert(bo->virtual);
1275 desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset;
1277 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1279 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1280 struct i965_kernel *kernel;
1281 kernel = &mfc_context->gpe_context.kernels[i];
1282 assert(sizeof(*desc) == 32);
1283 /*Setup the descritor table*/
1284 memset(desc, 0, sizeof(*desc));
1285 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1286 desc->desc3.sampler_count = 0;
1287 desc->desc3.sampler_state_pointer = 0;
1288 desc->desc4.binding_table_entry_count = 1;
1289 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1290 desc->desc5.constant_urb_entry_read_offset = 0;
1291 desc->desc5.constant_urb_entry_read_length = 4;
1303 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1304 struct encode_state *encode_state,
1305 struct intel_encoder_context *encoder_context)
1307 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1312 #define AVC_PAK_LEN_IN_BYTE 48
1313 #define AVC_PAK_LEN_IN_OWORD 3
1316 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1317 uint32_t intra_flag,
1329 uint32_t temp_value;
1330 BEGIN_BATCH(batch, 14);
1332 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1333 OUT_BATCH(batch, 0);
1334 OUT_BATCH(batch, 0);
1335 OUT_BATCH(batch, 0);
1336 OUT_BATCH(batch, 0);
1337 OUT_BATCH(batch, 0);
1340 OUT_BATCH(batch, head_offset / 16);
1341 OUT_BATCH(batch, (intra_flag) | (qp << 16));
1342 temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1343 OUT_BATCH(batch, temp_value);
1345 OUT_BATCH(batch, number_mb_cmds);
1348 ((slice_end_y << 8) | (slice_end_x)));
1349 OUT_BATCH(batch, fwd_ref);
1350 OUT_BATCH(batch, bwd_ref);
1352 OUT_BATCH(batch, MI_NOOP);
1354 ADVANCE_BATCH(batch);
1358 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1359 struct intel_encoder_context *encoder_context,
1360 VAEncSliceParameterBufferH264 *slice_param,
1365 struct intel_batchbuffer *batch = encoder_context->base.batch;
1366 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1367 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1368 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1369 int total_mbs = slice_param->num_macroblocks;
1370 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1371 int number_mb_cmds = 128;
1372 int starting_offset = 0;
1374 int last_mb, slice_end_x, slice_end_y;
1375 int remaining_mb = total_mbs;
1376 uint32_t fwd_ref , bwd_ref, mb_flag;
1378 last_mb = slice_param->macroblock_address + total_mbs - 1;
1379 slice_end_x = last_mb % width_in_mbs;
1380 slice_end_y = last_mb / width_in_mbs;
1382 if (slice_type == SLICE_TYPE_I) {
1387 fwd_ref = vme_context->ref_index_in_mb[0];
1388 bwd_ref = vme_context->ref_index_in_mb[1];
1392 if (width_in_mbs >= 100) {
1393 number_mb_cmds = width_in_mbs / 5;
1394 } else if (width_in_mbs >= 80) {
1395 number_mb_cmds = width_in_mbs / 4;
1396 } else if (width_in_mbs >= 60) {
1397 number_mb_cmds = width_in_mbs / 3;
1398 } else if (width_in_mbs >= 40) {
1399 number_mb_cmds = width_in_mbs / 2;
1401 number_mb_cmds = width_in_mbs;
1405 if (number_mb_cmds >= remaining_mb) {
1406 number_mb_cmds = remaining_mb;
1408 mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1409 mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1411 gen8_mfc_batchbuffer_emit_object_command(batch,
1424 head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1425 remaining_mb -= number_mb_cmds;
1426 starting_offset += number_mb_cmds;
1427 } while (remaining_mb > 0);
1431 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1432 struct encode_state *encode_state,
1433 struct intel_encoder_context *encoder_context,
1436 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1437 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1438 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1439 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1440 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1441 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1442 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1443 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1444 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1445 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1446 unsigned int tail_data[] = { 0x0, 0x0 };
1448 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1452 if (rate_control_mode == VA_RC_CBR) {
1453 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1454 if (encode_state->slice_header_index[slice_index] == 0) {
1455 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1460 /* only support for 8-bit pixel bit-depth */
1461 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1462 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1463 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1464 assert(qp >= 0 && qp < 52);
1466 gen8_mfc_avc_slice_state(ctx,
1471 (rate_control_mode == VA_RC_CBR),
1475 if (slice_index == 0)
1476 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1478 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1480 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1481 head_offset = intel_batchbuffer_used_size(slice_batch);
1483 slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1485 gen8_mfc_avc_batchbuffer_slice_command(ctx,
1493 /* Aligned for tail */
1494 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1496 mfc_context->insert_object(ctx,
1507 mfc_context->insert_object(ctx,
1523 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1524 struct encode_state *encode_state,
1525 struct intel_encoder_context *encoder_context)
1527 struct i965_driver_data *i965 = i965_driver_data(ctx);
1528 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1529 struct intel_batchbuffer *batch = encoder_context->base.batch;
1532 intel_batchbuffer_start_atomic(batch, 0x4000);
1534 if (IS_GEN9(i965->intel.device_info))
1535 gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1537 gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1539 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1540 gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1543 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1545 intel_batchbuffer_align(slice_batch, 8);
1546 BEGIN_BCS_BATCH(slice_batch, 2);
1547 OUT_BCS_BATCH(slice_batch, 0);
1548 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1549 ADVANCE_BCS_BATCH(slice_batch);
1551 BEGIN_BATCH(batch, 2);
1552 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1553 OUT_BATCH(batch, 0);
1554 ADVANCE_BATCH(batch);
1557 intel_batchbuffer_end_atomic(batch);
1558 intel_batchbuffer_flush(batch);
1560 if (IS_GEN9(i965->intel.device_info))
1561 gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1565 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1566 struct encode_state *encode_state,
1567 struct intel_encoder_context *encoder_context)
1569 gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1570 gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1571 gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1572 gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1576 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1577 struct encode_state *encode_state,
1578 struct intel_encoder_context *encoder_context)
1580 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1582 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1583 gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1585 return mfc_context->aux_batchbuffer_surface.bo;
1591 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1592 struct encode_state *encode_state,
1593 struct intel_encoder_context *encoder_context)
1595 struct intel_batchbuffer *batch = encoder_context->base.batch;
1596 dri_bo *slice_batch_bo;
1598 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1599 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1604 #if MFC_SOFTWARE_BATCH
1605 slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1607 slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1611 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1612 intel_batchbuffer_emit_mi_flush(batch);
1614 // picture level programing
1615 gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1617 BEGIN_BCS_BATCH(batch, 3);
1618 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1619 OUT_BCS_RELOC(batch,
1621 I915_GEM_DOMAIN_COMMAND, 0,
1623 OUT_BCS_BATCH(batch, 0);
1624 ADVANCE_BCS_BATCH(batch);
1627 intel_batchbuffer_end_atomic(batch);
1629 dri_bo_unreference(slice_batch_bo);
1634 gen8_mfc_avc_encode_picture(VADriverContextP ctx,
1635 struct encode_state *encode_state,
1636 struct intel_encoder_context *encoder_context)
1638 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1639 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1640 int current_frame_bits_size;
1644 gen8_mfc_init(ctx, encode_state, encoder_context);
1645 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1646 /*Programing bcs pipeline*/
1647 gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1648 gen8_mfc_run(ctx, encode_state, encoder_context);
1649 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1650 gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1651 sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1652 if (sts == BRC_NO_HRD_VIOLATION) {
1653 intel_mfc_hrd_context_update(encode_state, mfc_context);
1656 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1657 if (!mfc_context->hrd.violation_noted) {
1658 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1659 mfc_context->hrd.violation_noted = 1;
1661 return VA_STATUS_SUCCESS;
1668 return VA_STATUS_SUCCESS;
1676 va_to_gen8_mpeg2_picture_type[3] = {
1683 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1684 struct intel_encoder_context *encoder_context,
1685 struct encode_state *encode_state)
1687 struct intel_batchbuffer *batch = encoder_context->base.batch;
1688 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1689 VAEncPictureParameterBufferMPEG2 *pic_param;
1690 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1691 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1692 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1694 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1695 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1696 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1698 BEGIN_BCS_BATCH(batch, 13);
1699 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1700 OUT_BCS_BATCH(batch,
1701 (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1702 (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1703 (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1704 (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1705 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1706 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1707 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1708 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1709 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1710 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1711 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1712 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1713 OUT_BCS_BATCH(batch,
1714 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1715 va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1717 OUT_BCS_BATCH(batch,
1718 1 << 31 | /* slice concealment */
1719 (height_in_mbs - 1) << 16 |
1720 (width_in_mbs - 1));
1722 if (slice_param && slice_param->quantiser_scale_code >= 14)
1723 OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1725 OUT_BCS_BATCH(batch, 0);
1727 OUT_BCS_BATCH(batch, 0);
1728 OUT_BCS_BATCH(batch,
1729 0xFFF << 16 | /* InterMBMaxSize */
1730 0xFFF << 0 | /* IntraMBMaxSize */
1732 OUT_BCS_BATCH(batch, 0);
1733 OUT_BCS_BATCH(batch, 0);
1734 OUT_BCS_BATCH(batch, 0);
1735 OUT_BCS_BATCH(batch, 0);
1736 OUT_BCS_BATCH(batch, 0);
1737 OUT_BCS_BATCH(batch, 0);
1738 ADVANCE_BCS_BATCH(batch);
1742 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1744 unsigned char intra_qm[64] = {
1745 8, 16, 19, 22, 26, 27, 29, 34,
1746 16, 16, 22, 24, 27, 29, 34, 37,
1747 19, 22, 26, 27, 29, 34, 34, 38,
1748 22, 22, 26, 27, 29, 34, 37, 40,
1749 22, 26, 27, 29, 32, 35, 40, 48,
1750 26, 27, 29, 32, 35, 40, 48, 58,
1751 26, 27, 29, 34, 38, 46, 56, 69,
1752 27, 29, 35, 38, 46, 56, 69, 83
1755 unsigned char non_intra_qm[64] = {
1756 16, 16, 16, 16, 16, 16, 16, 16,
1757 16, 16, 16, 16, 16, 16, 16, 16,
1758 16, 16, 16, 16, 16, 16, 16, 16,
1759 16, 16, 16, 16, 16, 16, 16, 16,
1760 16, 16, 16, 16, 16, 16, 16, 16,
1761 16, 16, 16, 16, 16, 16, 16, 16,
1762 16, 16, 16, 16, 16, 16, 16, 16,
1763 16, 16, 16, 16, 16, 16, 16, 16
1766 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1767 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1771 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1773 unsigned short intra_fqm[64] = {
1774 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1775 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1776 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1777 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1778 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1779 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1780 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1781 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1784 unsigned short non_intra_fqm[64] = {
1785 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1786 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1787 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1788 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1789 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1790 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1791 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1792 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1795 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1796 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1800 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1801 struct intel_encoder_context *encoder_context,
1803 int next_x, int next_y,
1804 int is_fisrt_slice_group,
1805 int is_last_slice_group,
1808 struct intel_batchbuffer *batch)
1810 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1813 batch = encoder_context->base.batch;
1815 BEGIN_BCS_BATCH(batch, 8);
1817 OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1818 OUT_BCS_BATCH(batch,
1819 0 << 31 | /* MbRateCtrlFlag */
1820 !!is_last_slice_group << 19 | /* IsLastSliceGrp */
1821 1 << 17 | /* Insert Header before the first slice group data */
1822 1 << 16 | /* SliceData PresentFlag: always 1 */
1823 1 << 15 | /* TailPresentFlag: always 1 */
1824 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
1825 !!intra_slice << 13 | /* IntraSlice */
1826 !!intra_slice << 12 | /* IntraSliceFlag */
1828 OUT_BCS_BATCH(batch,
1834 OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
1835 /* bitstream pointer is only loaded once for the first slice of a frame when
1836 * LoadSlicePointerFlag is 0
1838 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1839 OUT_BCS_BATCH(batch, 0); /* FIXME: */
1840 OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
1841 OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
1843 ADVANCE_BCS_BATCH(batch);
1847 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1848 struct intel_encoder_context *encoder_context,
1850 int first_mb_in_slice,
1851 int last_mb_in_slice,
1852 int first_mb_in_slice_group,
1853 int last_mb_in_slice_group,
1856 int coded_block_pattern,
1857 unsigned char target_size_in_word,
1858 unsigned char max_size_in_word,
1859 struct intel_batchbuffer *batch)
1861 int len_in_dwords = 9;
1864 batch = encoder_context->base.batch;
1866 BEGIN_BCS_BATCH(batch, len_in_dwords);
1868 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1869 OUT_BCS_BATCH(batch,
1870 0 << 24 | /* PackedMvNum */
1871 0 << 20 | /* MvFormat */
1872 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1873 0 << 15 | /* TransformFlag: frame DCT */
1874 0 << 14 | /* FieldMbFlag */
1875 1 << 13 | /* IntraMbFlag */
1876 mb_type << 8 | /* MbType: Intra */
1877 0 << 2 | /* SkipMbFlag */
1878 0 << 0 | /* InterMbMode */
1880 OUT_BCS_BATCH(batch, y << 16 | x);
1881 OUT_BCS_BATCH(batch,
1882 max_size_in_word << 24 |
1883 target_size_in_word << 16 |
1884 coded_block_pattern << 6 | /* CBP */
1886 OUT_BCS_BATCH(batch,
1887 last_mb_in_slice << 31 |
1888 first_mb_in_slice << 30 |
1889 0 << 27 | /* EnableCoeffClamp */
1890 last_mb_in_slice_group << 26 |
1891 0 << 25 | /* MbSkipConvDisable */
1892 first_mb_in_slice_group << 24 |
1893 0 << 16 | /* MvFieldSelect */
1894 qp_scale_code << 0 |
1896 OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
1897 OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
1898 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1899 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1901 ADVANCE_BCS_BATCH(batch);
1903 return len_in_dwords;
1907 #define MPEG2_INTER_MV_OFFSET 48
1909 static struct _mv_ranges
1911 int low; /* in the unit of 1/2 pixel */
1912 int high; /* in the unit of 1/2 pixel */
1927 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1929 if (mv + pos * 16 * 2 < 0 ||
1930 mv + (pos + 1) * 16 * 2 > display_max * 2)
1933 if (f_code > 0 && f_code < 10) {
1934 if (mv < mv_ranges[f_code].low)
1935 mv = mv_ranges[f_code].low;
1937 if (mv > mv_ranges[f_code].high)
1938 mv = mv_ranges[f_code].high;
1945 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1946 struct encode_state *encode_state,
1947 struct intel_encoder_context *encoder_context,
1949 int width_in_mbs, int height_in_mbs,
1951 int first_mb_in_slice,
1952 int last_mb_in_slice,
1953 int first_mb_in_slice_group,
1954 int last_mb_in_slice_group,
1956 unsigned char target_size_in_word,
1957 unsigned char max_size_in_word,
1958 struct intel_batchbuffer *batch)
1960 VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1961 int len_in_dwords = 9;
1962 short *mvptr, mvx0, mvy0, mvx1, mvy1;
1965 batch = encoder_context->base.batch;
1967 mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1968 mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1969 mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1970 mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1971 mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1973 BEGIN_BCS_BATCH(batch, len_in_dwords);
1975 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1976 OUT_BCS_BATCH(batch,
1977 2 << 24 | /* PackedMvNum */
1978 7 << 20 | /* MvFormat */
1979 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1980 0 << 15 | /* TransformFlag: frame DCT */
1981 0 << 14 | /* FieldMbFlag */
1982 0 << 13 | /* IntraMbFlag */
1983 1 << 8 | /* MbType: Frame-based */
1984 0 << 2 | /* SkipMbFlag */
1985 0 << 0 | /* InterMbMode */
1987 OUT_BCS_BATCH(batch, y << 16 | x);
1988 OUT_BCS_BATCH(batch,
1989 max_size_in_word << 24 |
1990 target_size_in_word << 16 |
1991 0x3f << 6 | /* CBP */
1993 OUT_BCS_BATCH(batch,
1994 last_mb_in_slice << 31 |
1995 first_mb_in_slice << 30 |
1996 0 << 27 | /* EnableCoeffClamp */
1997 last_mb_in_slice_group << 26 |
1998 0 << 25 | /* MbSkipConvDisable */
1999 first_mb_in_slice_group << 24 |
2000 0 << 16 | /* MvFieldSelect */
2001 qp_scale_code << 0 |
2004 OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
2005 OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
2006 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
2007 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
2009 ADVANCE_BCS_BATCH(batch);
2011 return len_in_dwords;
2015 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2016 struct encode_state *encode_state,
2017 struct intel_encoder_context *encoder_context,
2018 struct intel_batchbuffer *slice_batch)
2020 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2021 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2023 if (encode_state->packed_header_data[idx]) {
2024 VAEncPackedHeaderParameterBuffer *param = NULL;
2025 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2026 unsigned int length_in_bits;
2028 assert(encode_state->packed_header_param[idx]);
2029 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2030 length_in_bits = param->bit_length;
2032 mfc_context->insert_object(ctx,
2035 ALIGN(length_in_bits, 32) >> 5,
2036 length_in_bits & 0x1f,
2037 5, /* FIXME: check it */
2040 0, /* Needn't insert emulation bytes for MPEG-2 */
2044 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2046 if (encode_state->packed_header_data[idx]) {
2047 VAEncPackedHeaderParameterBuffer *param = NULL;
2048 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2049 unsigned int length_in_bits;
2051 assert(encode_state->packed_header_param[idx]);
2052 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2053 length_in_bits = param->bit_length;
2055 mfc_context->insert_object(ctx,
2058 ALIGN(length_in_bits, 32) >> 5,
2059 length_in_bits & 0x1f,
2060 5, /* FIXME: check it */
2063 0, /* Needn't insert emulation bytes for MPEG-2 */
2069 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2070 struct encode_state *encode_state,
2071 struct intel_encoder_context *encoder_context,
2073 VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2074 struct intel_batchbuffer *slice_batch)
2076 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2077 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2078 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2079 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2080 unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2081 unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2082 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2083 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2085 int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2086 unsigned int *msg = NULL;
2087 unsigned char *msg_ptr = NULL;
2089 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2090 h_start_pos = slice_param->macroblock_address % width_in_mbs;
2091 v_start_pos = slice_param->macroblock_address / width_in_mbs;
2092 assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2094 dri_bo_map(vme_context->vme_output.bo , 0);
2095 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2097 if (next_slice_group_param) {
2098 h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2099 v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2101 h_next_start_pos = 0;
2102 v_next_start_pos = height_in_mbs;
2105 gen8_mfc_mpeg2_slicegroup_state(ctx,
2112 next_slice_group_param == NULL,
2113 slice_param->is_intra_slice,
2114 slice_param->quantiser_scale_code,
2117 if (slice_index == 0)
2118 intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2120 /* Insert '00' to make sure the header is valid */
2121 mfc_context->insert_object(ctx,
2123 (unsigned int*)section_delimiter,
2125 8, /* 8bits in the last DWORD */
2132 for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2133 /* PAK for each macroblocks */
2134 for (j = 0; j < slice_param->num_macroblocks; j++) {
2135 int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2136 int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2137 int first_mb_in_slice = (j == 0);
2138 int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2139 int first_mb_in_slice_group = (i == 0 && j == 0);
2140 int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2141 j == slice_param->num_macroblocks - 1);
2143 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2145 if (slice_param->is_intra_slice) {
2146 gen8_mfc_mpeg2_pak_object_intra(ctx,
2151 first_mb_in_slice_group,
2152 last_mb_in_slice_group,
2154 slice_param->quantiser_scale_code,
2160 int inter_rdo, intra_rdo;
2161 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2162 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2164 if (intra_rdo < inter_rdo)
2165 gen8_mfc_mpeg2_pak_object_intra(ctx,
2170 first_mb_in_slice_group,
2171 last_mb_in_slice_group,
2173 slice_param->quantiser_scale_code,
2179 gen8_mfc_mpeg2_pak_object_inter(ctx,
2183 width_in_mbs, height_in_mbs,
2187 first_mb_in_slice_group,
2188 last_mb_in_slice_group,
2189 slice_param->quantiser_scale_code,
2199 dri_bo_unmap(vme_context->vme_output.bo);
2202 if (next_slice_group_param == NULL) { /* end of a picture */
2203 mfc_context->insert_object(ctx,
2205 (unsigned int *)tail_delimiter,
2207 8, /* 8bits in the last DWORD */
2213 } else { /* end of a lsice group */
2214 mfc_context->insert_object(ctx,
2216 (unsigned int *)section_delimiter,
2218 8, /* 8bits in the last DWORD */
2228 * A batch buffer for all slices, including slice state,
2229 * slice insert object and slice pak object commands
2233 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2234 struct encode_state *encode_state,
2235 struct intel_encoder_context *encoder_context)
2237 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2238 struct intel_batchbuffer *batch;
2239 VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2243 batch = mfc_context->aux_batchbuffer;
2244 batch_bo = batch->buffer;
2246 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2247 if (i == encode_state->num_slice_params_ext - 1)
2248 next_slice_group_param = NULL;
2250 next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2252 gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2255 intel_batchbuffer_align(batch, 8);
2257 BEGIN_BCS_BATCH(batch, 2);
2258 OUT_BCS_BATCH(batch, 0);
2259 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2260 ADVANCE_BCS_BATCH(batch);
2262 dri_bo_reference(batch_bo);
2263 intel_batchbuffer_free(batch);
2264 mfc_context->aux_batchbuffer = NULL;
2270 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2271 struct encode_state *encode_state,
2272 struct intel_encoder_context *encoder_context)
2274 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2276 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2277 mfc_context->set_surface_state(ctx, encoder_context);
2278 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2279 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2280 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2281 gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2282 gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2283 gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2287 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2288 struct encode_state *encode_state,
2289 struct intel_encoder_context *encoder_context)
2291 struct intel_batchbuffer *batch = encoder_context->base.batch;
2292 dri_bo *slice_batch_bo;
2294 slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2297 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2298 intel_batchbuffer_emit_mi_flush(batch);
2300 // picture level programing
2301 gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2303 BEGIN_BCS_BATCH(batch, 4);
2304 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2305 OUT_BCS_RELOC(batch,
2307 I915_GEM_DOMAIN_COMMAND, 0,
2309 OUT_BCS_BATCH(batch, 0);
2310 OUT_BCS_BATCH(batch, 0);
2311 ADVANCE_BCS_BATCH(batch);
2314 intel_batchbuffer_end_atomic(batch);
2316 dri_bo_unreference(slice_batch_bo);
2320 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2321 struct encode_state *encode_state,
2322 struct intel_encoder_context *encoder_context)
2324 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2325 struct object_surface *obj_surface;
2326 struct object_buffer *obj_buffer;
2327 struct i965_coded_buffer_segment *coded_buffer_segment;
2328 VAStatus vaStatus = VA_STATUS_SUCCESS;
2332 /* reconstructed surface */
2333 obj_surface = encode_state->reconstructed_object;
2334 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2335 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2336 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2337 mfc_context->surface_state.width = obj_surface->orig_width;
2338 mfc_context->surface_state.height = obj_surface->orig_height;
2339 mfc_context->surface_state.w_pitch = obj_surface->width;
2340 mfc_context->surface_state.h_pitch = obj_surface->height;
2342 /* forward reference */
2343 obj_surface = encode_state->reference_objects[0];
2345 if (obj_surface && obj_surface->bo) {
2346 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2347 dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2349 mfc_context->reference_surfaces[0].bo = NULL;
2351 /* backward reference */
2352 obj_surface = encode_state->reference_objects[1];
2354 if (obj_surface && obj_surface->bo) {
2355 mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2356 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2358 mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2360 if (mfc_context->reference_surfaces[1].bo)
2361 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2364 for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2365 mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2367 if (mfc_context->reference_surfaces[i].bo)
2368 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2371 /* input YUV surface */
2372 obj_surface = encode_state->input_yuv_object;
2373 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2374 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2377 obj_buffer = encode_state->coded_buf_object;
2378 bo = obj_buffer->buffer_store->bo;
2379 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2380 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2381 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2382 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2384 /* set the internal flag to 0 to indicate the coded size is unknown */
2386 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2387 coded_buffer_segment->mapped = 0;
2388 coded_buffer_segment->codec = encoder_context->codec;
2395 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2396 struct encode_state *encode_state,
2397 struct intel_encoder_context *encoder_context)
2399 gen8_mfc_init(ctx, encode_state, encoder_context);
2400 intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2401 /*Programing bcs pipeline*/
2402 gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2403 gen8_mfc_run(ctx, encode_state, encoder_context);
2405 return VA_STATUS_SUCCESS;
2408 /* JPEG encode methods */
2411 intel_mfc_jpeg_prepare(VADriverContextP ctx,
2412 struct encode_state *encode_state,
2413 struct intel_encoder_context *encoder_context)
2415 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2416 struct object_surface *obj_surface;
2417 struct object_buffer *obj_buffer;
2418 struct i965_coded_buffer_segment *coded_buffer_segment;
2419 VAStatus vaStatus = VA_STATUS_SUCCESS;
2422 /* input YUV surface */
2423 obj_surface = encode_state->input_yuv_object;
2424 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2425 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2428 obj_buffer = encode_state->coded_buf_object;
2429 bo = obj_buffer->buffer_store->bo;
2430 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2431 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2432 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2433 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2435 /* set the internal flag to 0 to indicate the coded size is unknown */
2437 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2438 coded_buffer_segment->mapped = 0;
2439 coded_buffer_segment->codec = encoder_context->codec;
2447 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2448 struct intel_encoder_context *encoder_context,
2449 struct encode_state *encode_state)
2451 struct intel_batchbuffer *batch = encoder_context->base.batch;
2452 struct object_surface *obj_surface = encode_state->input_yuv_object;
2453 unsigned int input_fourcc;
2454 unsigned int y_cb_offset;
2455 unsigned int y_cr_offset;
2456 unsigned int surface_format;
2458 assert(obj_surface);
2460 y_cb_offset = obj_surface->y_cb_offset;
2461 y_cr_offset = obj_surface->y_cr_offset;
2462 input_fourcc = obj_surface->fourcc;
2464 surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2465 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2468 switch (input_fourcc) {
2469 case VA_FOURCC_Y800: {
2470 surface_format = MFX_SURFACE_MONOCHROME;
2473 case VA_FOURCC_NV12: {
2474 surface_format = MFX_SURFACE_PLANAR_420_8;
2477 case VA_FOURCC_UYVY: {
2478 surface_format = MFX_SURFACE_YCRCB_SWAPY;
2481 case VA_FOURCC_YUY2: {
2482 surface_format = MFX_SURFACE_YCRCB_NORMAL;
2485 case VA_FOURCC_RGBA:
2486 case VA_FOURCC_444P: {
2487 surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2492 BEGIN_BCS_BATCH(batch, 6);
2494 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2495 OUT_BCS_BATCH(batch, 0);
2496 OUT_BCS_BATCH(batch,
2497 ((obj_surface->orig_height - 1) << 18) |
2498 ((obj_surface->orig_width - 1) << 4));
2499 OUT_BCS_BATCH(batch,
2500 (surface_format << 28) | /* Surface Format */
2501 (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2502 (0 << 22) | /* surface object control state, FIXME??? */
2503 ((obj_surface->width - 1) << 3) | /* pitch */
2504 (0 << 2) | /* must be 0 for interleave U/V */
2505 (1 << 1) | /* must be tiled */
2506 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2507 OUT_BCS_BATCH(batch,
2508 (0 << 16) | /* X offset for U(Cb), must be 0 */
2509 (y_cb_offset << 0)); /* Y offset for U(Cb) */
2510 OUT_BCS_BATCH(batch,
2511 (0 << 16) | /* X offset for V(Cr), must be 0 */
2512 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2515 ADVANCE_BCS_BATCH(batch);
2519 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2520 struct intel_encoder_context *encoder_context,
2521 struct encode_state *encode_state)
2523 struct intel_batchbuffer *batch = encoder_context->base.batch;
2524 struct object_surface *obj_surface = encode_state->input_yuv_object;
2525 VAEncPictureParameterBufferJPEG *pic_param;
2526 unsigned int surface_format;
2527 unsigned int frame_width_in_blks;
2528 unsigned int frame_height_in_blks;
2529 unsigned int pixels_in_horizontal_lastMCU;
2530 unsigned int pixels_in_vertical_lastMCU;
2531 unsigned int input_surface_format;
2532 unsigned int output_mcu_format;
2533 unsigned int picture_width;
2534 unsigned int picture_height;
2536 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2537 assert(obj_surface);
2538 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2539 surface_format = obj_surface->fourcc;
2540 picture_width = pic_param->picture_width;
2541 picture_height = pic_param->picture_height;
2543 switch (surface_format) {
2544 case VA_FOURCC_Y800: {
2545 input_surface_format = JPEG_ENC_SURFACE_Y8;
2546 output_mcu_format = JPEG_ENC_MCU_YUV400;
2549 case VA_FOURCC_NV12: {
2550 input_surface_format = JPEG_ENC_SURFACE_NV12;
2551 output_mcu_format = JPEG_ENC_MCU_YUV420;
2554 case VA_FOURCC_UYVY: {
2555 input_surface_format = JPEG_ENC_SURFACE_UYVY;
2556 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2559 case VA_FOURCC_YUY2: {
2560 input_surface_format = JPEG_ENC_SURFACE_YUY2;
2561 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2565 case VA_FOURCC_RGBA:
2566 case VA_FOURCC_444P: {
2567 input_surface_format = JPEG_ENC_SURFACE_RGB;
2568 output_mcu_format = JPEG_ENC_MCU_RGB;
2572 input_surface_format = JPEG_ENC_SURFACE_NV12;
2573 output_mcu_format = JPEG_ENC_MCU_YUV420;
2579 switch (output_mcu_format) {
2581 case JPEG_ENC_MCU_YUV400:
2582 case JPEG_ENC_MCU_RGB: {
2583 pixels_in_horizontal_lastMCU = (picture_width % 8);
2584 pixels_in_vertical_lastMCU = (picture_height % 8);
2586 //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2587 frame_width_in_blks = ((picture_width + 7) / 8);
2588 frame_height_in_blks = ((picture_height + 7) / 8);
2592 case JPEG_ENC_MCU_YUV420: {
2593 if((picture_width % 2) == 0)
2594 pixels_in_horizontal_lastMCU = picture_width % 16;
2596 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2598 if((picture_height % 2) == 0)
2599 pixels_in_vertical_lastMCU = picture_height % 16;
2601 pixels_in_vertical_lastMCU = ((picture_height % 16) + 1) % 16;
2603 //H1=2,V1=2 for YUV420. So, compute these values accordingly
2604 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2605 frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2609 case JPEG_ENC_MCU_YUV422H_2Y: {
2610 if(picture_width % 2 == 0)
2611 pixels_in_horizontal_lastMCU = picture_width % 16;
2613 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2615 pixels_in_vertical_lastMCU = picture_height % 8;
2617 //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2618 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2619 frame_height_in_blks = ((picture_height + 7) / 8);
2624 BEGIN_BCS_BATCH(batch, 3);
2626 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2628 OUT_BCS_BATCH(batch,
2629 ( pixels_in_horizontal_lastMCU << 26) | /* Pixels In Horizontal Last MCU */
2630 ( pixels_in_vertical_lastMCU << 21) | /* Pixels In Vertical Last MCU */
2631 ( input_surface_format << 8) | /* Input Surface format */
2632 ( output_mcu_format << 0)); /* Output MCU Structure */
2634 OUT_BCS_BATCH(batch,
2635 ((frame_height_in_blks - 1) << 16) | /* Frame Height In Blks Minus 1 */
2636 (JPEG_ENC_ROUND_QUANT_DEFAULT << 13) | /* Rounding Quant set to default value 0 */
2637 ((frame_width_in_blks - 1) << 0)); /* Frame Width In Blks Minus 1 */
2638 ADVANCE_BCS_BATCH(batch);
2642 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2645 short reciprocal_qm[64];
2647 for(i=0; i<64; i++) {
2648 reciprocal_qm[i] = 65535/(raster_qm[i]);
2651 for(i=0; i<64; i++) {
2652 dword_qm[j] = ((reciprocal_qm[i+1] <<16) | (reciprocal_qm[i]));
2661 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2662 struct intel_encoder_context *encoder_context,
2663 struct encode_state *encode_state)
2665 unsigned int quality = 0;
2666 uint32_t temp, i = 0, j = 0, dword_qm[32];
2667 VAEncPictureParameterBufferJPEG *pic_param;
2668 VAQMatrixBufferJPEG *qmatrix;
2669 unsigned char raster_qm[64], column_raster_qm[64];
2670 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2672 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2673 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2674 quality = pic_param->quality;
2676 //If the app sends the qmatrix, use it, buffer it for using it with the next frames
2677 //The app can send qmatrix for the first frame and not send for the subsequent frames
2678 if(encode_state->q_matrix && encode_state->q_matrix->buffer) {
2679 qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2681 mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2682 memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2684 if(pic_param->num_components > 1) {
2685 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2686 memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2688 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2692 //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2693 qmatrix = &mfc_context->buffered_qmatrix;
2694 qmatrix->load_lum_quantiser_matrix = 1;
2695 qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2699 //As per the design, normalization of the quality factor and scaling of the Quantization tables
2700 //based on the quality factor needs to be done in the driver before sending the values to the HW.
2701 //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2702 //packed header information. The packed header is written as the header of the jpeg file. This
2703 //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2704 //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2705 //how to do this). QTables can be different for different applications. If no tables are provided,
2706 //the default tables in the driver are used.
2708 //Normalization of the quality factor
2709 if (quality > 100) quality=100;
2710 if (quality == 0) quality=1;
2711 quality = (quality < 50) ? (5000/quality) : (200 - (quality*2));
2713 //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2714 //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2715 //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2716 //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2719 if(qmatrix->load_lum_quantiser_matrix) {
2720 //apply quality to lum_quantiser_matrix
2721 for(i=0; i < 64; i++) {
2722 temp = (qmatrix->lum_quantiser_matrix[i] * quality)/100;
2723 //clamp to range [1,255]
2724 temp = (temp > 255) ? 255 : temp;
2725 temp = (temp < 1) ? 1 : temp;
2726 qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2729 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2730 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2731 for (j = 0; j < 64; j++)
2732 raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2734 //Convert the raster order(row-ordered) to the column-raster (column by column).
2735 //To be consistent with the other encoders, send it in column order.
2736 //Need to double check if our HW expects col or row raster.
2737 for (j = 0; j < 64; j++) {
2738 int row = j / 8, col = j % 8;
2739 column_raster_qm[col * 8 + row] = raster_qm[j];
2742 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2743 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2745 //send the luma qm to the command buffer
2746 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2749 //For Chroma, if chroma exists (Cb, Cr or G, B)
2750 if(qmatrix->load_chroma_quantiser_matrix) {
2751 //apply quality to chroma_quantiser_matrix
2752 for(i=0; i < 64; i++) {
2753 temp = (qmatrix->chroma_quantiser_matrix[i] * quality)/100;
2754 //clamp to range [1,255]
2755 temp = (temp > 255) ? 255 : temp;
2756 temp = (temp < 1) ? 1 : temp;
2757 qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2760 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2761 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2762 for (j = 0; j < 64; j++)
2763 raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2765 //Convert the raster order(row-ordered) to the column-raster (column by column).
2766 //To be consistent with the other encoders, send it in column order.
2767 //Need to double check if our HW expects col or row raster.
2768 for (j = 0; j < 64; j++) {
2769 int row = j / 8, col = j % 8;
2770 column_raster_qm[col * 8 + row] = raster_qm[j];
2774 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2775 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2777 //send the same chroma qm to the command buffer (for both U,V or G,B)
2778 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2779 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2784 //Translation of Table K.5 into code: This method takes the huffval from the
2785 //Huffmantable buffer and converts into index for the coefficients and size tables
2786 uint8_t map_huffval_to_index(uint8_t huff_val)
2790 if(huff_val < 0xF0) {
2791 index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2793 index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2800 //Implementation of Flow chart Annex C - Figure C.1
2802 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
2804 uint8_t i=1, j=1, k=0;
2807 while(j <= (uint8_t)bits[i-1]) {
2808 huff_size_table[k] = i;
2816 huff_size_table[k] = 0;
2820 //Implementation of Flow chart Annex C - Figure C.2
2822 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2826 uint8_t si=huff_size_table[k];
2828 while(huff_size_table[k] != 0) {
2830 while(huff_size_table[k] == si) {
2832 // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
2833 if(code == 0xFFFF) {
2837 huff_code_table[k] = code;
2848 //Implementation of Flow chat Annex C - Figure C.3
2850 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2852 uint8_t huff_val_size=0, i=0, k=0;
2854 huff_val_size = (type == 0) ? 12 : 162;
2855 uint8_t huff_si_table[huff_val_size];
2856 uint16_t huff_co_table[huff_val_size];
2858 memset(huff_si_table, 0, sizeof(huff_si_table));
2859 memset(huff_co_table, 0, sizeof(huff_co_table));
2862 i = map_huffval_to_index(huff_vals[k]);
2863 huff_co_table[i] = huff_code_table[k];
2864 huff_si_table[i] = huff_size_table[k];
2868 memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2869 memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2873 //This method converts the huffman table to code words which is needed by the HW
2874 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2876 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2878 uint8_t lastK = 0, i=0;
2879 uint8_t huff_val_size = 0;
2880 uint8_t *huff_bits, *huff_vals;
2882 huff_val_size = (type == 0) ? 12 : 162;
2883 uint8_t huff_size_table[huff_val_size+1]; //The +1 for adding 0 at the end of huff_val_size
2884 uint16_t huff_code_table[huff_val_size];
2886 memset(huff_size_table, 0, sizeof(huff_size_table));
2887 memset(huff_code_table, 0, sizeof(huff_code_table));
2889 huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2890 huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2893 //Generation of table of Huffman code sizes
2894 generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2896 //Generation of table of Huffman codes
2897 generate_huffman_codes_table(huff_size_table, huff_code_table);
2899 //Ordering procedure for encoding procedure code tables
2900 generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2902 //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2903 //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2904 for(i=0; i<huff_val_size; i++) {
2906 table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
2911 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
2913 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
2914 struct encode_state *encode_state,
2915 struct intel_encoder_context *encoder_context,
2918 VAHuffmanTableBufferJPEGBaseline *huff_buffer;
2919 struct intel_batchbuffer *batch = encoder_context->base.batch;
2921 uint32_t dc_table[12], ac_table[162];
2923 assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
2924 huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
2926 memset(dc_table, 0, 12);
2927 memset(ac_table, 0, 162);
2929 for (index = 0; index < num_tables; index++) {
2930 int id = va_to_gen7_jpeg_hufftable[index];
2932 if (!huff_buffer->load_huffman_table[index])
2935 //load DC table with 12 DWords
2936 convert_hufftable_to_codes(huff_buffer, dc_table, 0, index); //0 for Dc
2938 //load AC table with 162 DWords
2939 convert_hufftable_to_codes(huff_buffer, ac_table, 1, index); //1 for AC
2941 BEGIN_BCS_BATCH(batch, 176);
2942 OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
2943 OUT_BCS_BATCH(batch, id); //Huff table id
2945 //DWord 2 - 13 has DC_TABLE
2946 intel_batchbuffer_data(batch, dc_table, 12*4);
2948 //Dword 14 -175 has AC_TABLE
2949 intel_batchbuffer_data(batch, ac_table, 162*4);
2950 ADVANCE_BCS_BATCH(batch);
2955 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
2956 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
2958 switch (surface_format) {
2959 case VA_FOURCC_Y800: {
2964 case VA_FOURCC_NV12: {
2969 case VA_FOURCC_UYVY: {
2974 case VA_FOURCC_YUY2: {
2979 case VA_FOURCC_RGBA:
2980 case VA_FOURCC_444P: {
2985 default : { //May be have to insert error handling here. For now just use as below
2993 //set MFC_JPEG_SCAN_OBJECT
2995 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
2996 struct encode_state *encode_state,
2997 struct intel_encoder_context *encoder_context)
2999 uint32_t mcu_count, surface_format, Mx, My;
3000 uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table=0, huff_dc_table=0;
3001 uint8_t is_last_scan = 1; //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3002 uint8_t head_present_flag=1; //Header has tables and app data
3003 uint16_t num_components, restart_interval; //Specifies number of MCUs in an ECS.
3004 VAEncSliceParameterBufferJPEG *slice_param;
3005 VAEncPictureParameterBufferJPEG *pic_param;
3007 struct intel_batchbuffer *batch = encoder_context->base.batch;
3008 struct object_surface *obj_surface = encode_state->input_yuv_object;
3010 assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3011 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3012 assert(obj_surface);
3013 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3014 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3015 surface_format = obj_surface->fourcc;
3017 get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3019 // Mx = #MCUs in a row, My = #MCUs in a column
3020 Mx = (pic_param->picture_width + (horizontal_sampling_factor*8 -1))/(horizontal_sampling_factor*8);
3021 My = (pic_param->picture_height + (vertical_sampling_factor*8 -1))/(vertical_sampling_factor*8);
3022 mcu_count = (Mx * My);
3024 num_components = pic_param->num_components;
3025 restart_interval = slice_param->restart_interval;
3027 //Depending on number of components and values set for table selectors,
3028 //only those bits are set in 24:22 for AC table, 20:18 for DC table
3029 for(i=0; i<num_components; i++) {
3030 huff_ac_table |= ((slice_param->components[i].ac_table_selector)<<i);
3031 huff_dc_table |= ((slice_param->components[i].dc_table_selector)<<i);
3035 BEGIN_BCS_BATCH(batch, 3);
3037 OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
3039 OUT_BCS_BATCH(batch, mcu_count << 0); //MCU Count
3041 OUT_BCS_BATCH(batch,
3042 (huff_ac_table << 22) | //Huffman AC Table
3043 (huff_dc_table << 18) | //Huffman DC Table
3044 (head_present_flag << 17) | //Head present flag
3045 (is_last_scan << 16) | //Is last scan
3046 (restart_interval << 0)); //Restart Interval
3047 ADVANCE_BCS_BATCH(batch);
3051 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
3052 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
3053 int is_end_of_slice)
3055 struct intel_batchbuffer *batch = encoder_context->base.batch;
3058 if (data_bits_in_last_dw == 0)
3059 data_bits_in_last_dw = 32;
3061 BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3063 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3065 OUT_BCS_BATCH(batch,
3066 (0 << 16) | //DataByteOffset 0 for JPEG Encoder
3067 (0 << 15) | //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3068 (data_bits_in_last_dw << 8) | //DataBitsInLastDW
3069 (0 << 4) | //SkipEmulByteCount 0 for JPEG Encoder
3070 (0 << 3) | //EmulationFlag 0 for JPEG Encoder
3071 ((!!is_last_header) << 2) | //LastHeaderFlag
3072 ((!!is_end_of_slice) << 1) | //EndOfSliceFlag
3073 (1 << 0)); //BitstreamStartReset 1 for JPEG Encoder
3075 intel_batchbuffer_data(batch, insert_data, length_in_dws*4);
3077 ADVANCE_BCS_BATCH(batch);
3081 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3083 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3084 struct encode_state *encode_state,
3085 struct intel_encoder_context *encoder_context)
3087 if (encode_state->packed_header_data_ext) {
3088 VAEncPackedHeaderParameterBuffer *param = NULL;
3089 unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3090 unsigned int length_in_bits;
3092 param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3093 length_in_bits = param->bit_length;
3095 gen8_mfc_jpeg_pak_insert_object(encoder_context,
3097 ALIGN(length_in_bits, 32) >> 5,
3098 length_in_bits & 0x1f,
3104 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3105 //If the app sends the qmatrix, this will be replaced with the one app sends.
3107 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3110 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3112 //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3114 mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3117 mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3120 /* This is at the picture level */
3122 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3123 struct encode_state *encode_state,
3124 struct intel_encoder_context *encoder_context)
3126 int i, j, component, max_selector = 0;
3127 VAEncSliceParameterBufferJPEG *slice_param;
3129 gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3130 gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3131 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3132 gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3133 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3134 gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3136 //do the slice level encoding here
3137 gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3139 //I dont think I need this for loop. Just to be consistent with other encoding logic...
3140 for(i = 0; i < encode_state->num_slice_params_ext; i++) {
3141 assert(encode_state->slice_params && encode_state->slice_params_ext[i]->buffer);
3142 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3144 for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3146 for(component = 0; component < slice_param->num_components; component++) {
3147 if(max_selector < slice_param->components[component].dc_table_selector)
3148 max_selector = slice_param->components[component].dc_table_selector;
3150 if (max_selector < slice_param->components[component].ac_table_selector)
3151 max_selector = slice_param->components[component].ac_table_selector;
3158 assert(max_selector < 2);
3159 //send the huffman table using MFC_JPEG_HUFF_TABLE
3160 gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector+1);
3161 //set MFC_JPEG_SCAN_OBJECT
3162 gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3163 //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3164 gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3169 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3170 struct encode_state *encode_state,
3171 struct intel_encoder_context *encoder_context)
3173 struct intel_batchbuffer *batch = encoder_context->base.batch;
3176 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3177 intel_batchbuffer_emit_mi_flush(batch);
3179 // picture level programing
3180 gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3183 intel_batchbuffer_end_atomic(batch);
3189 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
3190 struct encode_state *encode_state,
3191 struct intel_encoder_context *encoder_context)
3193 gen8_mfc_init(ctx, encode_state, encoder_context);
3194 intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3195 /*Programing bcs pipeline*/
3196 gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3197 gen8_mfc_run(ctx, encode_state, encoder_context);
3199 return VA_STATUS_SUCCESS;
3202 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3203 struct gen6_mfc_context *mfc_context,
3204 int target_frame_size,
3207 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3208 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3209 unsigned int max_qindex = pic_param->clamp_qindex_high;
3210 unsigned int min_qindex = pic_param->clamp_qindex_low;
3211 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3212 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3214 int last_size_gap = -1;
3215 int per_mb_size_at_qindex;
3216 int target_qindex = min_qindex, i;
3218 /* make sure would not overflow*/
3219 if (target_frame_size >= (0x7fffffff >> 9))
3220 target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3222 target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3224 for (i = min_qindex; i <= max_qindex; i++) {
3225 per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3227 if (per_mb_size_at_qindex <= target_mb_size) {
3228 if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3233 last_size_gap = per_mb_size_at_qindex - target_mb_size;
3236 return target_qindex;
3240 gen8_mfc_vp8_bit_rate_control_context_init(struct encode_state *encode_state,
3241 struct gen6_mfc_context *mfc_context)
3243 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3244 VAEncMiscParameterBuffer *misc_param_frame_rate_buffer = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
3245 VAEncMiscParameterFrameRate* param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buffer->data;
3246 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3247 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3248 float fps = param_frame_rate->framerate;
3249 int inter_mb_size = seq_param->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
3250 int intra_mb_size = inter_mb_size * 5.0;
3252 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
3253 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
3254 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
3255 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
3257 mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
3258 mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
3260 mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
3261 mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
3264 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3265 struct intel_encoder_context* encoder_context)
3267 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3268 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3269 VAEncMiscParameterBuffer* misc_param_hrd = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
3270 VAEncMiscParameterHRD* param_hrd = (VAEncMiscParameterHRD*)misc_param_hrd->data;
3271 VAEncMiscParameterBuffer* misc_param_frame_rate_buffer = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
3272 VAEncMiscParameterFrameRate* param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buffer->data;
3273 double bitrate = seq_param->bits_per_second;
3274 unsigned int frame_rate = param_frame_rate->framerate;
3275 int inum = 1, pnum = 0;
3276 int intra_period = seq_param->intra_period;
3277 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3278 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3279 int max_frame_size = (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs;/* vp8_bits_per_mb table mutilpled 512 */
3281 pnum = intra_period - 1;
3283 mfc_context->brc.mode = encoder_context->rate_control_mode;
3285 mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/frame_rate) /
3286 (double)(inum + BRC_PWEIGHT * pnum ));
3287 mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
3289 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
3290 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
3292 mfc_context->brc.bits_per_frame = bitrate/frame_rate;
3294 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = gen8_mfc_vp8_qindex_estimate(encode_state,
3296 mfc_context->brc.target_frame_size[SLICE_TYPE_I],
3298 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = gen8_mfc_vp8_qindex_estimate(encode_state,
3300 mfc_context->brc.target_frame_size[SLICE_TYPE_P],
3303 mfc_context->hrd.buffer_size = (double)param_hrd->buffer_size;
3304 mfc_context->hrd.current_buffer_fullness =
3305 (double)(param_hrd->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
3306 param_hrd->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
3307 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
3308 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/max_frame_size;
3309 mfc_context->hrd.violation_noted = 0;
3312 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3313 struct gen6_mfc_context *mfc_context,
3316 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3317 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3318 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3319 int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3320 int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
3321 int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
3322 int qp; // quantizer of previously encoded slice of current type
3323 int qpn; // predicted quantizer for next frame of current type in integer format
3324 double qpf; // predicted quantizer for next frame of current type in float format
3325 double delta_qp; // QP correction
3326 int target_frame_size, frame_size_next;
3328 * x - how far we are from HRD buffer borders
3329 * y - how far we are from target HRD buffer fullness
3332 double frame_size_alpha;
3333 unsigned int max_qindex = pic_param->clamp_qindex_high;
3334 unsigned int min_qindex = pic_param->clamp_qindex_low;
3336 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
3338 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
3339 if (mfc_context->hrd.buffer_capacity < 5)
3340 frame_size_alpha = 0;
3342 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
3343 if (frame_size_alpha > 30) frame_size_alpha = 30;
3344 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3345 (double)(frame_size_alpha + 1.);
3347 /* frame_size_next: avoiding negative number and too small value */
3348 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3349 frame_size_next = (int)((double)target_frame_size * 0.25);
3351 qpf = (double)qp * target_frame_size / frame_size_next;
3352 qpn = (int)(qpf + 0.5);
3355 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3356 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
3357 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
3359 mfc_context->brc.qpf_rounding_accumulator = 0.;
3360 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
3362 mfc_context->brc.qpf_rounding_accumulator = 0.;
3366 /* making sure that QP is not changing too fast */
3367 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3368 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3369 /* making sure that with QP predictions we did do not leave QPs range */
3370 BRC_CLIP(qpn, min_qindex, max_qindex);
3372 /* checking wthether HRD compliance is still met */
3373 sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
3375 /* calculating QP delta as some function*/
3376 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
3378 x /= mfc_context->hrd.target_buffer_fullness;
3379 y = mfc_context->hrd.current_buffer_fullness;
3382 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
3383 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
3385 if (y < 0.01) y = 0.01;
3387 else if (x < -1) x = -1;
3389 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
3390 qpn = (int)(qpn + delta_qp + 0.5);
3392 /* making sure that with QP predictions we did do not leave QPs range */
3393 BRC_CLIP(qpn, min_qindex, max_qindex);
3395 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3396 /* correcting QPs of slices of other types */
3397 if (!is_key_frame) {
3398 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3399 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3401 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3402 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3404 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, min_qindex, max_qindex);
3405 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, min_qindex, max_qindex);
3406 } else if (sts == BRC_UNDERFLOW) { // underflow
3407 if (qpn <= qp) qpn = qp + 2;
3408 if (qpn > max_qindex) {
3410 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3412 } else if (sts == BRC_OVERFLOW) {
3413 if (qpn >= qp) qpn = qp - 2;
3414 if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3416 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3420 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
3425 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3426 struct intel_encoder_context *encoder_context)
3428 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3429 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3430 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3431 int target_bit_rate = seq_param->bits_per_second;
3433 // current we only support CBR mode.
3434 if (rate_control_mode == VA_RC_CBR) {
3435 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3436 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
3437 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
3438 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3439 mfc_context->vui_hrd.i_frame_number = 0;
3441 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3442 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3443 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3448 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3449 struct gen6_mfc_context *mfc_context)
3451 mfc_context->vui_hrd.i_frame_number++;
3455 * Check whether the parameters related with CBR are updated and decide whether
3456 * it needs to reinitialize the configuration related with CBR.
3457 * Currently it will check the following parameters:
3460 * gop_configuration(intra_period, ip_period, intra_idr_period)
3462 static bool gen8_mfc_vp8_brc_updated_check(struct encode_state *encode_state,
3463 struct intel_encoder_context *encoder_context)
3465 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3466 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3467 double cur_fps, cur_bitrate;
3468 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3469 VAEncMiscParameterBuffer *misc_param_frame_rate_buf = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
3470 VAEncMiscParameterFrameRate *param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buf->data;
3471 unsigned int frame_rate = param_frame_rate->framerate;
3473 if (rate_control_mode != VA_RC_CBR) {
3477 cur_bitrate = seq_param->bits_per_second;
3478 cur_fps = frame_rate;
3480 if ((cur_bitrate == mfc_context->brc.saved_bps) &&
3481 (cur_fps == mfc_context->brc.saved_fps) &&
3482 (seq_param->intra_period == mfc_context->brc.saved_intra_period)) {
3483 /* the parameters related with CBR are not updaetd */
3487 mfc_context->brc.saved_intra_period = seq_param->intra_period;
3488 mfc_context->brc.saved_fps = cur_fps;
3489 mfc_context->brc.saved_bps = cur_bitrate;
3493 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3494 struct intel_encoder_context *encoder_context)
3496 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3497 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3499 if (rate_control_mode == VA_RC_CBR) {
3501 assert(encoder_context->codec != CODEC_MPEG2);
3503 brc_updated = gen8_mfc_vp8_brc_updated_check(encode_state, encoder_context);
3505 /*Programing bit rate control */
3506 if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
3508 gen8_mfc_vp8_bit_rate_control_context_init(encode_state, mfc_context);
3509 gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3512 /*Programing HRD control */
3513 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
3514 gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3518 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3519 VAEncPictureParameterBufferVP8 *pic_param,
3520 VAQMatrixBufferVP8 *q_matrix)
3523 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3524 unsigned char *coeff_probs_stream_in_buffer;
3526 mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3527 mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3528 mfc_context->vp8_state.frame_header_token_update_pos = 0;
3529 mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3531 mfc_context->vp8_state.prob_skip_false = 255;
3532 memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3533 memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3536 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3537 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3539 mfc_context->vp8_state.prob_intra = 255;
3540 mfc_context->vp8_state.prob_last = 128;
3541 mfc_context->vp8_state.prob_gf = 128;
3543 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3544 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3546 mfc_context->vp8_state.prob_intra = 63;
3547 mfc_context->vp8_state.prob_last = 128;
3548 mfc_context->vp8_state.prob_gf = 128;
3551 mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3553 dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3554 coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3555 assert(coeff_probs_stream_in_buffer);
3556 memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3557 dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3560 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3561 VAQMatrixBufferVP8 *q_matrix)
3564 /*some other probabilities need to be updated*/
3567 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3568 VAEncPictureParameterBufferVP8 *pic_param,
3569 VAQMatrixBufferVP8 *q_matrix,
3570 struct gen6_mfc_context *mfc_context,
3571 struct intel_encoder_context *encoder_context);
3573 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3574 struct intel_encoder_context *encoder_context,
3575 struct gen6_mfc_context *mfc_context)
3577 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3578 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3579 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3580 unsigned char *frame_header_buffer;
3582 binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3584 dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3585 frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3586 assert(frame_header_buffer);
3587 memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3588 dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3591 #define MAX_VP8_FRAME_HEADER_SIZE 0x2000
3592 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
3594 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3595 struct encode_state *encode_state,
3596 struct intel_encoder_context *encoder_context)
3598 struct i965_driver_data *i965 = i965_driver_data(ctx);
3599 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3602 int width_in_mbs = 0;
3603 int height_in_mbs = 0;
3604 int slice_batchbuffer_size;
3605 int is_key_frame, slice_type, rate_control_mode;
3607 VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3608 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3609 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3611 width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3612 height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3614 is_key_frame = !pic_param->pic_flags.bits.frame_type;
3615 slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3616 rate_control_mode = encoder_context->rate_control_mode;
3618 if (rate_control_mode == VA_RC_CBR) {
3619 q_matrix->quantization_index[0] = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
3620 for (i = 1; i < 4; i++)
3621 q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3622 for (i = 0; i < 5; i++)
3623 q_matrix->quantization_index_delta[i] = 0;
3626 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3627 (SLICE_HEADER + SLICE_TAIL);
3629 /*Encode common setup for MFC*/
3630 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3631 mfc_context->post_deblocking_output.bo = NULL;
3633 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3634 mfc_context->pre_deblocking_output.bo = NULL;
3636 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3637 mfc_context->uncompressed_picture_source.bo = NULL;
3639 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3640 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3642 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3643 if ( mfc_context->direct_mv_buffers[i].bo != NULL)
3644 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3645 mfc_context->direct_mv_buffers[i].bo = NULL;
3648 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3649 if (mfc_context->reference_surfaces[i].bo != NULL)
3650 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3651 mfc_context->reference_surfaces[i].bo = NULL;
3654 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3655 bo = dri_bo_alloc(i965->intel.bufmgr,
3657 width_in_mbs * 64 * 16,
3660 mfc_context->intra_row_store_scratch_buffer.bo = bo;
3662 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3663 bo = dri_bo_alloc(i965->intel.bufmgr,
3665 width_in_mbs * height_in_mbs * 16,
3668 mfc_context->macroblock_status_buffer.bo = bo;
3670 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3671 bo = dri_bo_alloc(i965->intel.bufmgr,
3673 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3676 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3678 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3679 bo = dri_bo_alloc(i965->intel.bufmgr,
3681 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3684 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3686 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3687 mfc_context->mfc_batchbuffer_surface.bo = NULL;
3689 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3690 mfc_context->aux_batchbuffer_surface.bo = NULL;
3692 if (mfc_context->aux_batchbuffer) {
3693 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3694 mfc_context->aux_batchbuffer = NULL;
3697 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3698 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3699 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3700 mfc_context->aux_batchbuffer_surface.pitch = 16;
3701 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3702 mfc_context->aux_batchbuffer_surface.size_block = 16;
3704 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3706 /* alloc vp8 encoding buffers*/
3707 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3708 bo = dri_bo_alloc(i965->intel.bufmgr,
3710 MAX_VP8_FRAME_HEADER_SIZE,
3713 mfc_context->vp8_state.frame_header_bo = bo;
3715 mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3716 for(i = 0; i < 8; i++) {
3717 mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3719 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3720 bo = dri_bo_alloc(i965->intel.bufmgr,
3722 mfc_context->vp8_state.intermediate_buffer_max_size,
3725 mfc_context->vp8_state.intermediate_bo = bo;
3727 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3728 bo = dri_bo_alloc(i965->intel.bufmgr,
3730 width_in_mbs * height_in_mbs * 16,
3733 mfc_context->vp8_state.stream_out_bo = bo;
3735 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3736 bo = dri_bo_alloc(i965->intel.bufmgr,
3738 sizeof(vp8_default_coef_probs),
3741 mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3743 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3744 bo = dri_bo_alloc(i965->intel.bufmgr,
3746 VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3749 mfc_context->vp8_state.token_statistics_bo = bo;
3751 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3752 bo = dri_bo_alloc(i965->intel.bufmgr,
3754 width_in_mbs * 16 * 64,
3757 mfc_context->vp8_state.mpc_row_store_bo = bo;
3759 vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3760 vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3764 intel_mfc_vp8_prepare(VADriverContextP ctx,
3765 struct encode_state *encode_state,
3766 struct intel_encoder_context *encoder_context)
3768 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3769 struct object_surface *obj_surface;
3770 struct object_buffer *obj_buffer;
3771 struct i965_coded_buffer_segment *coded_buffer_segment;
3772 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3773 VAStatus vaStatus = VA_STATUS_SUCCESS;
3777 /* reconstructed surface */
3778 obj_surface = encode_state->reconstructed_object;
3779 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3780 if (pic_param->loop_filter_level[0] == 0) {
3781 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3782 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3784 mfc_context->post_deblocking_output.bo = obj_surface->bo;
3785 dri_bo_reference(mfc_context->post_deblocking_output.bo);
3788 mfc_context->surface_state.width = obj_surface->orig_width;
3789 mfc_context->surface_state.height = obj_surface->orig_height;
3790 mfc_context->surface_state.w_pitch = obj_surface->width;
3791 mfc_context->surface_state.h_pitch = obj_surface->height;
3793 /* set vp8 reference frames */
3794 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3795 obj_surface = encode_state->reference_objects[i];
3797 if (obj_surface && obj_surface->bo) {
3798 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3799 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3801 mfc_context->reference_surfaces[i].bo = NULL;
3805 /* input YUV surface */
3806 obj_surface = encode_state->input_yuv_object;
3807 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3808 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3811 obj_buffer = encode_state->coded_buf_object;
3812 bo = obj_buffer->buffer_store->bo;
3813 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3814 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3815 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3816 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3818 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3819 mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3820 mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3821 dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3823 /* set the internal flag to 0 to indicate the coded size is unknown */
3825 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3826 coded_buffer_segment->mapped = 0;
3827 coded_buffer_segment->codec = encoder_context->codec;
3834 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3835 struct encode_state *encode_state,
3836 struct intel_encoder_context *encoder_context)
3838 struct intel_batchbuffer *batch = encoder_context->base.batch;
3839 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3840 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3841 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3843 BEGIN_BCS_BATCH(batch, 30);
3844 OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3846 OUT_BCS_BATCH(batch,
3847 0 << 9 | /* compressed bitstream output disable */
3848 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3849 1 << 6 | /* RC initial pass */
3850 0 << 4 | /* upate segment feature date flag */
3851 1 << 3 | /* bitstream statistics output enable */
3852 1 << 2 | /* token statistics output enable */
3853 0 << 1 | /* final bitstream output disable */
3856 OUT_BCS_BATCH(batch, 0); /*DW2*/
3858 OUT_BCS_BATCH(batch,
3859 0xfff << 16 | /* max intra mb bit count limit */
3860 0xfff << 0 /* max inter mb bit count limit */
3863 OUT_BCS_BATCH(batch, 0); /*DW4*/
3864 OUT_BCS_BATCH(batch, 0); /*DW5*/
3865 OUT_BCS_BATCH(batch, 0); /*DW6*/
3866 OUT_BCS_BATCH(batch, 0); /*DW7*/
3867 OUT_BCS_BATCH(batch, 0); /*DW8*/
3868 OUT_BCS_BATCH(batch, 0); /*DW9*/
3869 OUT_BCS_BATCH(batch, 0); /*DW10*/
3870 OUT_BCS_BATCH(batch, 0); /*DW11*/
3871 OUT_BCS_BATCH(batch, 0); /*DW12*/
3872 OUT_BCS_BATCH(batch, 0); /*DW13*/
3873 OUT_BCS_BATCH(batch, 0); /*DW14*/
3874 OUT_BCS_BATCH(batch, 0); /*DW15*/
3875 OUT_BCS_BATCH(batch, 0); /*DW16*/
3876 OUT_BCS_BATCH(batch, 0); /*DW17*/
3877 OUT_BCS_BATCH(batch, 0); /*DW18*/
3878 OUT_BCS_BATCH(batch, 0); /*DW19*/
3879 OUT_BCS_BATCH(batch, 0); /*DW20*/
3880 OUT_BCS_BATCH(batch, 0); /*DW21*/
3882 OUT_BCS_BATCH(batch,
3883 pic_param->pic_flags.bits.show_frame << 23 |
3884 pic_param->pic_flags.bits.version << 20
3887 OUT_BCS_BATCH(batch,
3888 (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3889 (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3893 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3896 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3899 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3902 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3905 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3908 OUT_BCS_BATCH(batch, 0);
3910 ADVANCE_BCS_BATCH(batch);
3914 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3915 struct encode_state *encode_state,
3916 struct intel_encoder_context *encoder_context)
3918 struct intel_batchbuffer *batch = encoder_context->base.batch;
3919 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3920 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3921 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3922 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3925 log2num = pic_param->pic_flags.bits.num_token_partitions;
3927 /*update mode and token probs*/
3928 vp8_enc_state_update(mfc_context, q_matrix);
3930 BEGIN_BCS_BATCH(batch, 38);
3931 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3932 OUT_BCS_BATCH(batch,
3933 (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3934 (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3936 OUT_BCS_BATCH(batch,
3938 pic_param->sharpness_level << 16 |
3939 pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3940 pic_param->pic_flags.bits.sign_bias_golden << 12 |
3941 pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3942 pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3943 pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
3944 pic_param->pic_flags.bits.segmentation_enabled << 8 |
3945 !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3946 (pic_param->pic_flags.bits.version / 2) << 4 |
3947 (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3948 !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
3950 OUT_BCS_BATCH(batch,
3951 pic_param->loop_filter_level[3] << 24 |
3952 pic_param->loop_filter_level[2] << 16 |
3953 pic_param->loop_filter_level[1] << 8 |
3954 pic_param->loop_filter_level[0] << 0);
3956 OUT_BCS_BATCH(batch,
3957 q_matrix->quantization_index[3] << 24 |
3958 q_matrix->quantization_index[2] << 16 |
3959 q_matrix->quantization_index[1] << 8 |
3960 q_matrix->quantization_index[0] << 0);
3962 OUT_BCS_BATCH(batch,
3963 ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
3964 abs(q_matrix->quantization_index_delta[4]) << 24 |
3965 ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
3966 abs(q_matrix->quantization_index_delta[3]) << 16 |
3967 ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
3968 abs(q_matrix->quantization_index_delta[2]) << 8 |
3969 ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
3970 abs(q_matrix->quantization_index_delta[1]) << 0);
3972 OUT_BCS_BATCH(batch,
3973 ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
3974 abs(q_matrix->quantization_index_delta[0]) << 0);
3976 OUT_BCS_BATCH(batch,
3977 pic_param->clamp_qindex_high << 8 |
3978 pic_param->clamp_qindex_low << 0);
3980 for (i = 8; i < 19; i++) {
3981 OUT_BCS_BATCH(batch, 0xffffffff);
3984 OUT_BCS_BATCH(batch,
3985 mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
3986 mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
3987 mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
3989 OUT_BCS_BATCH(batch,
3990 mfc_context->vp8_state.prob_skip_false << 24 |
3991 mfc_context->vp8_state.prob_intra << 16 |
3992 mfc_context->vp8_state.prob_last << 8 |
3993 mfc_context->vp8_state.prob_gf << 0);
3995 OUT_BCS_BATCH(batch,
3996 mfc_context->vp8_state.y_mode_probs[3] << 24 |
3997 mfc_context->vp8_state.y_mode_probs[2] << 16 |
3998 mfc_context->vp8_state.y_mode_probs[1] << 8 |
3999 mfc_context->vp8_state.y_mode_probs[0] << 0);
4001 OUT_BCS_BATCH(batch,
4002 mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4003 mfc_context->vp8_state.uv_mode_probs[1] << 8 |
4004 mfc_context->vp8_state.uv_mode_probs[0] << 0);
4006 /* MV update value, DW23-DW32 */
4007 for (i = 0; i < 2; i++) {
4008 for (j = 0; j < 20; j += 4) {
4009 OUT_BCS_BATCH(batch,
4010 (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4011 mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4012 mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
4013 mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
4017 OUT_BCS_BATCH(batch,
4018 (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4019 (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4020 (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
4021 (pic_param->ref_lf_delta[0] & 0x7f) << 0);
4023 OUT_BCS_BATCH(batch,
4024 (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4025 (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4026 (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
4027 (pic_param->mode_lf_delta[0] & 0x7f) << 0);
4029 OUT_BCS_BATCH(batch, 0);
4030 OUT_BCS_BATCH(batch, 0);
4031 OUT_BCS_BATCH(batch, 0);
4033 ADVANCE_BCS_BATCH(batch);
4036 #define OUT_VP8_BUFFER(bo, offset) \
4038 OUT_BCS_RELOC(batch, \
4040 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4043 OUT_BCS_BATCH(batch, 0); \
4044 OUT_BCS_BATCH(batch, 0); \
4045 OUT_BCS_BATCH(batch, 0);
4048 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
4049 struct encode_state *encode_state,
4050 struct intel_encoder_context *encoder_context)
4052 struct intel_batchbuffer *batch = encoder_context->base.batch;
4053 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4055 BEGIN_BCS_BATCH(batch, 32);
4056 OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4058 OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4060 OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4061 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4062 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4063 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4064 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4065 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4066 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4067 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4068 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4069 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4071 OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4072 OUT_BCS_BATCH(batch, 0);
4074 OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4075 OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4076 OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4077 OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4079 ADVANCE_BCS_BATCH(batch);
4083 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4084 struct encode_state *encode_state,
4085 struct intel_encoder_context *encoder_context)
4087 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4089 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4090 mfc_context->set_surface_state(ctx, encoder_context);
4091 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4092 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4093 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4094 gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4095 gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
4096 gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4099 static const unsigned char
4100 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4107 static const unsigned char
4108 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4120 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4122 unsigned int i, pak_pred_mode = 0;
4123 unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4126 pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4128 for (i = 0; i < 8; i++) {
4129 vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4130 assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4131 pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4132 pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4136 return pak_pred_mode;
4139 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
4140 struct intel_encoder_context *encoder_context,
4143 struct intel_batchbuffer *batch)
4145 unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4146 unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4147 unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4150 batch = encoder_context->base.batch;
4152 vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4153 assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4154 pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4156 vme_luma_pred_mode[0] = msg[1];
4157 vme_luma_pred_mode[1] = msg[2];
4158 vme_chroma_pred_mode = msg[3] & 0x3;
4160 pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4161 pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4162 pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4164 BEGIN_BCS_BATCH(batch, 7);
4166 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4167 OUT_BCS_BATCH(batch, 0);
4168 OUT_BCS_BATCH(batch, 0);
4169 OUT_BCS_BATCH(batch,
4170 (0 << 20) | /* mv format: intra mb */
4171 (0 << 18) | /* Segment ID */
4172 (0 << 17) | /* disable coeff clamp */
4173 (1 << 13) | /* intra mb flag */
4174 (0 << 11) | /* refer picture select: last frame */
4175 (pak_intra_mb_mode << 8) | /* mb type */
4176 (pak_chroma_pred_mode << 4) | /* mb uv mode */
4177 (0 << 2) | /* skip mb flag: disable */
4180 OUT_BCS_BATCH(batch, (y << 16) | x);
4181 OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4182 OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4184 ADVANCE_BCS_BATCH(batch);
4188 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
4189 struct intel_encoder_context *encoder_context,
4193 struct intel_batchbuffer *batch)
4198 batch = encoder_context->base.batch;
4200 /* only support inter_16x16 now */
4201 assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4202 /* for inter_16x16, all 16 MVs should be same,
4203 * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4204 * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4206 msg[0] = (((msg[AVC_INTER_MV_OFFSET/4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET/4] << 1) & 0xffff));
4208 for (i = 1; i < 16; i++) {
4212 BEGIN_BCS_BATCH(batch, 7);
4214 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4215 OUT_BCS_BATCH(batch,
4216 (0 << 29) | /* enable inline mv data: disable */
4218 OUT_BCS_BATCH(batch,
4220 OUT_BCS_BATCH(batch,
4221 (4 << 20) | /* mv format: inter */
4222 (0 << 18) | /* Segment ID */
4223 (0 << 17) | /* coeff clamp: disable */
4224 (0 << 13) | /* intra mb flag: inter mb */
4225 (0 << 11) | /* refer picture select: last frame */
4226 (0 << 8) | /* mb type: 16x16 */
4227 (0 << 4) | /* mb uv mode: dc_pred */
4228 (0 << 2) | /* skip mb flag: disable */
4231 OUT_BCS_BATCH(batch, (y << 16) | x);
4234 OUT_BCS_BATCH(batch, 0x8);
4235 OUT_BCS_BATCH(batch, 0x8);
4237 ADVANCE_BCS_BATCH(batch);
4241 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4242 struct encode_state *encode_state,
4243 struct intel_encoder_context *encoder_context,
4244 struct intel_batchbuffer *slice_batch)
4246 struct gen6_vme_context *vme_context = encoder_context->vme_context;
4247 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4248 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4249 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4250 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4251 unsigned int *msg = NULL;
4252 unsigned char *msg_ptr = NULL;
4253 unsigned int i, offset, is_intra_frame;
4255 is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4257 dri_bo_map(vme_context->vme_output.bo , 1);
4258 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4260 for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
4261 int h_pos = i % width_in_mbs;
4262 int v_pos = i / width_in_mbs;
4263 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
4265 if (is_intra_frame) {
4266 gen8_mfc_vp8_pak_object_intra(ctx,
4272 int inter_rdo, intra_rdo;
4273 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4274 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4276 if (intra_rdo < inter_rdo) {
4277 gen8_mfc_vp8_pak_object_intra(ctx,
4283 offset = i * vme_context->vme_output.size_block;
4284 gen8_mfc_vp8_pak_object_inter(ctx,
4294 dri_bo_unmap(vme_context->vme_output.bo);
4298 * A batch buffer for vp8 pak object commands
4301 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4302 struct encode_state *encode_state,
4303 struct intel_encoder_context *encoder_context)
4305 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4306 struct intel_batchbuffer *batch;
4309 batch = mfc_context->aux_batchbuffer;
4310 batch_bo = batch->buffer;
4312 gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4314 intel_batchbuffer_align(batch, 8);
4316 BEGIN_BCS_BATCH(batch, 2);
4317 OUT_BCS_BATCH(batch, 0);
4318 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4319 ADVANCE_BCS_BATCH(batch);
4321 dri_bo_reference(batch_bo);
4322 intel_batchbuffer_free(batch);
4323 mfc_context->aux_batchbuffer = NULL;
4329 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4330 struct encode_state *encode_state,
4331 struct intel_encoder_context *encoder_context)
4333 struct intel_batchbuffer *batch = encoder_context->base.batch;
4334 dri_bo *slice_batch_bo;
4336 slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4339 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4340 intel_batchbuffer_emit_mi_flush(batch);
4342 // picture level programing
4343 gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4345 BEGIN_BCS_BATCH(batch, 4);
4346 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4347 OUT_BCS_RELOC(batch,
4349 I915_GEM_DOMAIN_COMMAND, 0,
4351 OUT_BCS_BATCH(batch, 0);
4352 OUT_BCS_BATCH(batch, 0);
4353 ADVANCE_BCS_BATCH(batch);
4356 intel_batchbuffer_end_atomic(batch);
4358 dri_bo_unreference(slice_batch_bo);
4361 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4362 struct encode_state *encode_state,
4363 struct intel_encoder_context *encoder_context)
4365 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4366 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4367 unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4368 unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4370 int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4372 first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4374 dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4376 vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4377 first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4379 for (i = 1; i <= partition_num; i++)
4380 token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4382 /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4383 /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4384 vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4386 dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4388 dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4389 struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4390 coded_buffer_segment->base.size = vp8_coded_bytes;
4391 dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4393 return vp8_coded_bytes;
4397 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4398 struct encode_state *encode_state,
4399 struct intel_encoder_context *encoder_context)
4401 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4402 unsigned int rate_control_mode = encoder_context->rate_control_mode;
4403 int current_frame_bits_size;
4406 gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4407 intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4408 /*Programing bcs pipeline*/
4409 gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4410 gen8_mfc_run(ctx, encode_state, encoder_context);
4411 current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4413 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4414 sts = gen8_mfc_vp8_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
4415 if (sts == BRC_NO_HRD_VIOLATION) {
4416 gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4418 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4419 if (!mfc_context->hrd.violation_noted) {
4420 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
4421 mfc_context->hrd.violation_noted = 1;
4423 return VA_STATUS_SUCCESS;
4427 return VA_STATUS_SUCCESS;
4431 gen8_mfc_context_destroy(void *context)
4433 struct gen6_mfc_context *mfc_context = context;
4436 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4437 mfc_context->post_deblocking_output.bo = NULL;
4439 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4440 mfc_context->pre_deblocking_output.bo = NULL;
4442 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4443 mfc_context->uncompressed_picture_source.bo = NULL;
4445 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
4446 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4448 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
4449 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4450 mfc_context->direct_mv_buffers[i].bo = NULL;
4453 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4454 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4456 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4457 mfc_context->macroblock_status_buffer.bo = NULL;
4459 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4460 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4462 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4463 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4466 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
4467 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4468 mfc_context->reference_surfaces[i].bo = NULL;
4471 gen8_gpe_context_destroy(&mfc_context->gpe_context);
4473 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4474 mfc_context->mfc_batchbuffer_surface.bo = NULL;
4476 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4477 mfc_context->aux_batchbuffer_surface.bo = NULL;
4479 if (mfc_context->aux_batchbuffer)
4480 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4482 mfc_context->aux_batchbuffer = NULL;
4484 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4485 mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4487 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4488 mfc_context->vp8_state.final_frame_bo = NULL;
4490 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4491 mfc_context->vp8_state.frame_header_bo = NULL;
4493 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4494 mfc_context->vp8_state.intermediate_bo = NULL;
4496 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4497 mfc_context->vp8_state.mpc_row_store_bo = NULL;
4499 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4500 mfc_context->vp8_state.stream_out_bo = NULL;
4502 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4503 mfc_context->vp8_state.token_statistics_bo = NULL;
4508 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4510 struct encode_state *encode_state,
4511 struct intel_encoder_context *encoder_context)
4516 case VAProfileH264ConstrainedBaseline:
4517 case VAProfileH264Main:
4518 case VAProfileH264High:
4519 case VAProfileH264MultiviewHigh:
4520 case VAProfileH264StereoHigh:
4521 vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4524 /* FIXME: add for other profile */
4525 case VAProfileMPEG2Simple:
4526 case VAProfileMPEG2Main:
4527 vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4530 case VAProfileJPEGBaseline:
4531 jpeg_init_default_qmatrix(ctx, encoder_context);
4532 vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4535 case VAProfileVP8Version0_3:
4536 vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4540 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4547 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4549 struct i965_driver_data *i965 = i965_driver_data(ctx);
4550 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4552 assert(mfc_context);
4553 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4555 mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
4556 mfc_context->gpe_context.curbe_size = 32 * 4;
4557 mfc_context->gpe_context.sampler_size = 0;
4559 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4560 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4561 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4562 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4563 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4565 if (IS_GEN9(i965->intel.device_info)) {
4566 gen8_gpe_load_kernels(ctx,
4567 &mfc_context->gpe_context,
4571 gen8_gpe_load_kernels(ctx,
4572 &mfc_context->gpe_context,
4577 mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4578 mfc_context->set_surface_state = gen8_mfc_surface_state;
4579 mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4580 mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4581 mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4582 mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4583 mfc_context->insert_object = gen8_mfc_avc_insert_object;
4584 mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4586 encoder_context->mfc_context = mfc_context;
4587 encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4588 encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4590 if (encoder_context->codec == CODEC_VP8)
4591 encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4593 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;