2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
48 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52 #define MFC_SOFTWARE_BATCH 0
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75 16, 11, 10, 16, 24, 40, 51, 61,
76 12, 12, 14, 19, 26, 58, 60, 55,
77 14, 13, 16, 24, 40, 57, 69, 56,
78 14, 17, 22, 29, 51, 87, 80, 62,
79 18, 22, 37, 56, 68, 109, 103, 77,
80 24, 35, 55, 64, 81, 104, 113, 92,
81 49, 64, 78, 87, 103, 121, 120, 101,
82 72, 92, 95, 98, 112, 100, 103, 99
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88 17, 18, 24, 47, 99, 99, 99, 99,
89 18, 21, 26, 66, 99, 99, 99, 99,
90 24, 26, 56, 99, 99, 99, 99, 99,
91 47, 66, 99, 99, 99, 99, 99, 99,
92 99, 99, 99, 99, 99, 99, 99, 99,
93 99, 99, 99, 99, 99, 99, 99, 99,
94 99, 99, 99, 99, 99, 99, 99, 99,
95 99, 99, 99, 99, 99, 99, 99, 99
99 static const int va_to_gen7_jpeg_hufftable[2] = {
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
112 static struct i965_kernel gen8_mfc_kernels[] = {
114 "MFC AVC INTRA BATCHBUFFER ",
115 MFC_BATCHBUFFER_AVC_INTRA,
116 gen8_mfc_batchbuffer_avc,
117 sizeof(gen8_mfc_batchbuffer_avc),
122 static struct i965_kernel gen9_mfc_kernels[] = {
124 "MFC AVC INTRA BATCHBUFFER ",
125 MFC_BATCHBUFFER_AVC_INTRA,
126 gen9_mfc_batchbuffer_avc,
127 sizeof(gen9_mfc_batchbuffer_avc),
132 static const uint32_t qm_flat[16] = {
133 0x10101010, 0x10101010, 0x10101010, 0x10101010,
134 0x10101010, 0x10101010, 0x10101010, 0x10101010,
135 0x10101010, 0x10101010, 0x10101010, 0x10101010,
136 0x10101010, 0x10101010, 0x10101010, 0x10101010
139 static const uint32_t fqm_flat[32] = {
140 0x10001000, 0x10001000, 0x10001000, 0x10001000,
141 0x10001000, 0x10001000, 0x10001000, 0x10001000,
142 0x10001000, 0x10001000, 0x10001000, 0x10001000,
143 0x10001000, 0x10001000, 0x10001000, 0x10001000,
144 0x10001000, 0x10001000, 0x10001000, 0x10001000,
145 0x10001000, 0x10001000, 0x10001000, 0x10001000,
146 0x10001000, 0x10001000, 0x10001000, 0x10001000,
147 0x10001000, 0x10001000, 0x10001000, 0x10001000
150 #define INTER_MODE_MASK 0x03
151 #define INTER_8X8 0x03
152 #define INTER_16X8 0x01
153 #define INTER_8X16 0x02
154 #define SUBMB_SHAPE_MASK 0x00FF00
155 #define INTER_16X16 0x00
157 #define INTER_MV8 (4 << 20)
158 #define INTER_MV32 (6 << 20)
162 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
164 struct intel_encoder_context *encoder_context)
166 struct intel_batchbuffer *batch = encoder_context->base.batch;
167 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
169 assert(standard_select == MFX_FORMAT_MPEG2 ||
170 standard_select == MFX_FORMAT_AVC ||
171 standard_select == MFX_FORMAT_JPEG ||
172 standard_select == MFX_FORMAT_VP8);
174 BEGIN_BCS_BATCH(batch, 5);
176 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
178 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
179 (MFD_MODE_VLD << 15) | /* VLD mode */
180 (0 << 10) | /* Stream-Out Enable */
181 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
182 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
183 (0 << 6) | /* frame statistics stream-out enable*/
184 (0 << 5) | /* not in stitch mode */
185 (1 << 4) | /* encoding mode */
186 (standard_select << 0)); /* standard select: avc or mpeg2 or jpeg*/
188 (0 << 7) | /* expand NOA bus flag */
189 (0 << 6) | /* disable slice-level clock gating */
190 (0 << 5) | /* disable clock gating for NOA */
191 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
192 (0 << 3) | /* terminate if AVC mbdata error occurs */
193 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
199 ADVANCE_BCS_BATCH(batch);
203 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
205 struct intel_batchbuffer *batch = encoder_context->base.batch;
206 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
208 BEGIN_BCS_BATCH(batch, 6);
210 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
211 OUT_BCS_BATCH(batch, 0);
213 ((mfc_context->surface_state.height - 1) << 18) |
214 ((mfc_context->surface_state.width - 1) << 4));
216 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
217 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
218 (0 << 22) | /* surface object control state, FIXME??? */
219 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
220 (0 << 2) | /* must be 0 for interleave U/V */
221 (1 << 1) | /* must be tiled */
222 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
224 (0 << 16) | /* must be 0 for interleave U/V */
225 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
226 OUT_BCS_BATCH(batch, 0);
228 ADVANCE_BCS_BATCH(batch);
232 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
233 struct intel_encoder_context *encoder_context)
235 struct i965_driver_data *i965 = i965_driver_data(ctx);
236 struct intel_batchbuffer *batch = encoder_context->base.batch;
237 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
238 struct gen6_vme_context *vme_context = encoder_context->vme_context;
240 unsigned int bse_offset;
242 BEGIN_BCS_BATCH(batch, 26);
244 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
245 /* the DW1-3 is for the MFX indirect bistream offset */
246 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
250 /* the DW4-5 is the MFX upper bound */
251 if (encoder_context->codec == CODEC_VP8) {
252 OUT_BCS_RELOC64(batch,
253 mfc_context->mfc_indirect_pak_bse_object.bo,
254 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
255 mfc_context->mfc_indirect_pak_bse_object.end_offset);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
261 if (encoder_context->codec != CODEC_JPEG) {
262 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
263 /* the DW6-10 is for MFX Indirect MV Object Base Address */
264 OUT_BCS_RELOC64(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
265 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266 OUT_BCS_RELOC64(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
268 /* No VME for JPEG */
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
273 OUT_BCS_BATCH(batch, 0);
276 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
277 OUT_BCS_BATCH(batch, 0);
278 OUT_BCS_BATCH(batch, 0);
279 OUT_BCS_BATCH(batch, 0);
280 OUT_BCS_BATCH(batch, 0);
281 OUT_BCS_BATCH(batch, 0);
283 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
284 OUT_BCS_BATCH(batch, 0);
285 OUT_BCS_BATCH(batch, 0);
286 OUT_BCS_BATCH(batch, 0);
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
290 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
291 bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
292 OUT_BCS_RELOC64(batch,
293 mfc_context->mfc_indirect_pak_bse_object.bo,
294 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
296 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
298 OUT_BCS_RELOC64(batch,
299 mfc_context->mfc_indirect_pak_bse_object.bo,
300 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
301 mfc_context->mfc_indirect_pak_bse_object.end_offset);
303 ADVANCE_BCS_BATCH(batch);
307 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
308 struct intel_encoder_context *encoder_context)
310 struct intel_batchbuffer *batch = encoder_context->base.batch;
311 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
312 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
314 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
315 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
317 BEGIN_BCS_BATCH(batch, 16);
319 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
320 /*DW1. MB setting of frame */
322 ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
324 ((height_in_mbs - 1) << 16) |
325 ((width_in_mbs - 1) << 0));
328 (0 << 24) | /* Second Chroma QP Offset */
329 (0 << 16) | /* Chroma QP Offset */
330 (0 << 14) | /* Max-bit conformance Intra flag */
331 (0 << 13) | /* Max Macroblock size conformance Inter flag */
332 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
333 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
334 (0 << 8) | /* FIXME: Image Structure */
335 (0 << 0)); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
337 (0 << 16) | /* Mininum Frame size */
338 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
339 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
340 (0 << 13) | /* CABAC 0 word insertion test enable */
341 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
342 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
343 (0 << 8) | /* FIXME: MbMvFormatFlag */
344 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
345 (0 << 6) | /* Only valid for VLD decoding mode */
346 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
347 (0 << 4) | /* Direct 8x8 inference flag */
348 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
349 (1 << 2) | /* Frame MB only flag */
350 (0 << 1) | /* MBAFF mode is in active */
351 (0 << 0)); /* Field picture flag */
352 /* DW5 Trellis quantization */
353 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
354 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
355 (0xBB8 << 16) | /* InterMbMaxSz */
356 (0xEE8)); /* IntraMbMaxSz */
357 OUT_BCS_BATCH(batch, 0); /* Reserved */
359 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
360 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
361 /* DW10. Bit setting for MB */
362 OUT_BCS_BATCH(batch, 0x8C000000);
363 OUT_BCS_BATCH(batch, 0x00010000);
365 OUT_BCS_BATCH(batch, 0);
366 OUT_BCS_BATCH(batch, 0x02010100);
367 /* DW14. For short format */
368 OUT_BCS_BATCH(batch, 0);
369 OUT_BCS_BATCH(batch, 0);
371 ADVANCE_BCS_BATCH(batch);
375 gen8_mfc_qm_state(VADriverContextP ctx,
379 struct intel_encoder_context *encoder_context)
381 struct intel_batchbuffer *batch = encoder_context->base.batch;
382 unsigned int qm_buffer[16];
384 assert(qm_length <= 16);
385 assert(sizeof(*qm) == 4);
386 memcpy(qm_buffer, qm, qm_length * 4);
388 BEGIN_BCS_BATCH(batch, 18);
389 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
390 OUT_BCS_BATCH(batch, qm_type << 0);
391 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
392 ADVANCE_BCS_BATCH(batch);
396 gen8_mfc_avc_qm_state(VADriverContextP ctx,
397 struct encode_state *encode_state,
398 struct intel_encoder_context *encoder_context)
400 const unsigned int *qm_4x4_intra;
401 const unsigned int *qm_4x4_inter;
402 const unsigned int *qm_8x8_intra;
403 const unsigned int *qm_8x8_inter;
404 VAEncSequenceParameterBufferH264 *pSeqParameter =
405 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
406 VAEncPictureParameterBufferH264 *pPicParameter =
407 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
409 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
410 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
411 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
413 VAIQMatrixBufferH264 *qm;
414 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
415 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
416 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
417 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
418 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
419 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
422 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
423 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
424 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
425 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
429 gen8_mfc_fqm_state(VADriverContextP ctx,
433 struct intel_encoder_context *encoder_context)
435 struct intel_batchbuffer *batch = encoder_context->base.batch;
436 unsigned int fqm_buffer[32];
438 assert(fqm_length <= 32);
439 assert(sizeof(*fqm) == 4);
440 memcpy(fqm_buffer, fqm, fqm_length * 4);
442 BEGIN_BCS_BATCH(batch, 34);
443 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
444 OUT_BCS_BATCH(batch, fqm_type << 0);
445 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
446 ADVANCE_BCS_BATCH(batch);
450 gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
453 for (i = 0; i < len; i++)
454 for (j = 0; j < len; j++)
455 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
459 gen8_mfc_avc_fqm_state(VADriverContextP ctx,
460 struct encode_state *encode_state,
461 struct intel_encoder_context *encoder_context)
463 VAEncSequenceParameterBufferH264 *pSeqParameter =
464 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
465 VAEncPictureParameterBufferH264 *pPicParameter =
466 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
468 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
469 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
470 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
471 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
472 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
473 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
477 VAIQMatrixBufferH264 *qm;
478 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
479 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
481 for (i = 0; i < 3; i++)
482 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
483 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
485 for (i = 3; i < 6; i++)
486 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
487 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
489 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
490 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
492 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
493 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
498 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
499 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
500 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
501 struct intel_batchbuffer *batch)
504 batch = encoder_context->base.batch;
506 if (data_bits_in_last_dw == 0)
507 data_bits_in_last_dw = 32;
509 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
511 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
513 (0 << 16) | /* always start at offset 0 */
514 (data_bits_in_last_dw << 8) |
515 (skip_emul_byte_count << 4) |
516 (!!emulation_flag << 3) |
517 ((!!is_last_header) << 2) |
518 ((!!is_end_of_slice) << 1) |
519 (0 << 0)); /* FIXME: ??? */
520 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
522 ADVANCE_BCS_BATCH(batch);
526 static void gen8_mfc_init(VADriverContextP ctx,
527 struct encode_state *encode_state,
528 struct intel_encoder_context *encoder_context)
530 struct i965_driver_data *i965 = i965_driver_data(ctx);
531 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
534 int width_in_mbs = 0;
535 int height_in_mbs = 0;
536 int slice_batchbuffer_size;
538 if (encoder_context->codec == CODEC_H264 ||
539 encoder_context->codec == CODEC_H264_MVC) {
540 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
541 width_in_mbs = pSequenceParameter->picture_width_in_mbs;
542 height_in_mbs = pSequenceParameter->picture_height_in_mbs;
543 } else if (encoder_context->codec == CODEC_MPEG2) {
544 VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
546 assert(encoder_context->codec == CODEC_MPEG2);
548 width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
549 height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
551 assert(encoder_context->codec == CODEC_JPEG);
552 VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
554 width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
555 height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
558 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
559 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
561 /*Encode common setup for MFC*/
562 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
563 mfc_context->post_deblocking_output.bo = NULL;
565 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
566 mfc_context->pre_deblocking_output.bo = NULL;
568 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
569 mfc_context->uncompressed_picture_source.bo = NULL;
571 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
572 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
574 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
575 if (mfc_context->direct_mv_buffers[i].bo != NULL)
576 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
577 mfc_context->direct_mv_buffers[i].bo = NULL;
580 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
581 if (mfc_context->reference_surfaces[i].bo != NULL)
582 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
583 mfc_context->reference_surfaces[i].bo = NULL;
586 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
587 bo = dri_bo_alloc(i965->intel.bufmgr,
592 mfc_context->intra_row_store_scratch_buffer.bo = bo;
594 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
595 bo = dri_bo_alloc(i965->intel.bufmgr,
597 width_in_mbs * height_in_mbs * 16,
600 mfc_context->macroblock_status_buffer.bo = bo;
602 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
603 bo = dri_bo_alloc(i965->intel.bufmgr,
605 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
608 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
610 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
611 bo = dri_bo_alloc(i965->intel.bufmgr,
613 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
616 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
618 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
619 mfc_context->mfc_batchbuffer_surface.bo = NULL;
621 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
622 mfc_context->aux_batchbuffer_surface.bo = NULL;
624 if (mfc_context->aux_batchbuffer)
625 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
627 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
628 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
629 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
630 mfc_context->aux_batchbuffer_surface.pitch = 16;
631 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
632 mfc_context->aux_batchbuffer_surface.size_block = 16;
634 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
638 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
639 struct intel_encoder_context *encoder_context)
641 struct i965_driver_data *i965 = i965_driver_data(ctx);
642 struct intel_batchbuffer *batch = encoder_context->base.batch;
643 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
646 BEGIN_BCS_BATCH(batch, 61);
648 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
650 /* the DW1-3 is for pre_deblocking */
651 if (mfc_context->pre_deblocking_output.bo)
652 OUT_BCS_RELOC64(batch, mfc_context->pre_deblocking_output.bo,
653 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
656 OUT_BCS_BATCH(batch, 0);
657 OUT_BCS_BATCH(batch, 0); /* pre output addr */
660 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
661 /* the DW4-6 is for the post_deblocking */
663 if (mfc_context->post_deblocking_output.bo)
664 OUT_BCS_RELOC64(batch, mfc_context->post_deblocking_output.bo,
665 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
666 0); /* post output addr */
668 OUT_BCS_BATCH(batch, 0);
669 OUT_BCS_BATCH(batch, 0);
672 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
674 /* the DW7-9 is for the uncompressed_picture */
675 OUT_BCS_RELOC64(batch, mfc_context->uncompressed_picture_source.bo,
676 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
677 0); /* uncompressed data */
679 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
681 /* the DW10-12 is for the mb status */
682 OUT_BCS_RELOC64(batch, mfc_context->macroblock_status_buffer.bo,
683 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
684 0); /* StreamOut data*/
686 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
688 /* the DW13-15 is for the intra_row_store_scratch */
689 OUT_BCS_RELOC64(batch, mfc_context->intra_row_store_scratch_buffer.bo,
690 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
693 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
695 /* the DW16-18 is for the deblocking filter */
696 OUT_BCS_RELOC64(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
697 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
700 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
702 /* the DW 19-50 is for Reference pictures*/
703 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
704 if (mfc_context->reference_surfaces[i].bo != NULL) {
705 OUT_BCS_RELOC64(batch, mfc_context->reference_surfaces[i].bo,
706 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
709 OUT_BCS_BATCH(batch, 0);
710 OUT_BCS_BATCH(batch, 0);
715 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
717 /* The DW 52-54 is for the MB status buffer */
718 OUT_BCS_RELOC64(batch, mfc_context->macroblock_status_buffer.bo,
719 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
720 0); /* Macroblock status buffer*/
722 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
724 /* the DW 55-57 is the ILDB buffer */
725 OUT_BCS_BATCH(batch, 0);
726 OUT_BCS_BATCH(batch, 0);
727 OUT_BCS_BATCH(batch, 0);
729 /* the DW 58-60 is the second ILDB buffer */
730 OUT_BCS_BATCH(batch, 0);
731 OUT_BCS_BATCH(batch, 0);
732 OUT_BCS_BATCH(batch, 0);
734 ADVANCE_BCS_BATCH(batch);
738 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
739 struct intel_encoder_context *encoder_context)
741 struct i965_driver_data *i965 = i965_driver_data(ctx);
742 struct intel_batchbuffer *batch = encoder_context->base.batch;
743 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
747 BEGIN_BCS_BATCH(batch, 71);
749 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
751 /* Reference frames and Current frames */
752 /* the DW1-32 is for the direct MV for reference */
753 for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
754 if (mfc_context->direct_mv_buffers[i].bo != NULL) {
755 OUT_BCS_RELOC64(batch, mfc_context->direct_mv_buffers[i].bo,
756 I915_GEM_DOMAIN_INSTRUCTION, 0,
759 OUT_BCS_BATCH(batch, 0);
760 OUT_BCS_BATCH(batch, 0);
764 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
766 /* the DW34-36 is the MV for the current reference */
767 OUT_BCS_RELOC64(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
768 I915_GEM_DOMAIN_INSTRUCTION, 0,
771 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
774 for (i = 0; i < 32; i++) {
775 OUT_BCS_BATCH(batch, i / 2);
777 OUT_BCS_BATCH(batch, 0);
778 OUT_BCS_BATCH(batch, 0);
780 ADVANCE_BCS_BATCH(batch);
785 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
786 struct intel_encoder_context *encoder_context)
788 struct i965_driver_data *i965 = i965_driver_data(ctx);
789 struct intel_batchbuffer *batch = encoder_context->base.batch;
790 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
792 BEGIN_BCS_BATCH(batch, 10);
794 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
795 OUT_BCS_RELOC64(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
796 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
798 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
800 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
801 OUT_BCS_BATCH(batch, 0);
802 OUT_BCS_BATCH(batch, 0);
803 OUT_BCS_BATCH(batch, 0);
805 /* the DW7-9 is for Bitplane Read Buffer Base Address */
806 OUT_BCS_BATCH(batch, 0);
807 OUT_BCS_BATCH(batch, 0);
808 OUT_BCS_BATCH(batch, 0);
810 ADVANCE_BCS_BATCH(batch);
814 static void gen8_mfc_avc_pipeline_picture_programing(VADriverContextP ctx,
815 struct encode_state *encode_state,
816 struct intel_encoder_context *encoder_context)
818 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
820 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
821 mfc_context->set_surface_state(ctx, encoder_context);
822 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
823 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
824 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
825 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
826 mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
827 mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
828 gen8_mfc_avc_directmode_state(ctx, encoder_context);
829 intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
833 static VAStatus gen8_mfc_run(VADriverContextP ctx,
834 struct encode_state *encode_state,
835 struct intel_encoder_context *encoder_context)
837 struct intel_batchbuffer *batch = encoder_context->base.batch;
839 intel_batchbuffer_flush(batch); //run the pipeline
841 return VA_STATUS_SUCCESS;
846 gen8_mfc_stop(VADriverContextP ctx,
847 struct encode_state *encode_state,
848 struct intel_encoder_context *encoder_context,
849 int *encoded_bits_size)
851 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
852 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
853 VACodedBufferSegment *coded_buffer_segment;
855 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
856 assert(vaStatus == VA_STATUS_SUCCESS);
857 *encoded_bits_size = coded_buffer_segment->size * 8;
858 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
860 return VA_STATUS_SUCCESS;
865 gen8_mfc_avc_slice_state(VADriverContextP ctx,
866 VAEncPictureParameterBufferH264 *pic_param,
867 VAEncSliceParameterBufferH264 *slice_param,
868 struct encode_state *encode_state,
869 struct intel_encoder_context *encoder_context,
870 int rate_control_enable,
872 struct intel_batchbuffer *batch)
874 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
875 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
876 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
877 int beginmb = slice_param->macroblock_address;
878 int endmb = beginmb + slice_param->num_macroblocks;
879 int beginx = beginmb % width_in_mbs;
880 int beginy = beginmb / width_in_mbs;
881 int nextx = endmb % width_in_mbs;
882 int nexty = endmb / width_in_mbs;
883 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
884 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
886 unsigned char correct[6], grow, shrink;
888 int weighted_pred_idc = 0;
889 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
890 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
891 int num_ref_l0 = 0, num_ref_l1 = 0;
894 batch = encoder_context->base.batch;
896 if (slice_type == SLICE_TYPE_I) {
897 luma_log2_weight_denom = 0;
898 chroma_log2_weight_denom = 0;
899 } else if (slice_type == SLICE_TYPE_P) {
900 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
901 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
903 if (slice_param->num_ref_idx_active_override_flag)
904 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
905 } else if (slice_type == SLICE_TYPE_B) {
906 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
907 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
908 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
910 if (slice_param->num_ref_idx_active_override_flag) {
911 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
912 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
915 if (weighted_pred_idc == 2) {
916 /* 8.4.3 - Derivation process for prediction weights (8-279) */
917 luma_log2_weight_denom = 5;
918 chroma_log2_weight_denom = 5;
922 maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
923 maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
925 for (i = 0; i < 6; i++)
926 correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
928 grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
929 (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
930 shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
931 (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
933 BEGIN_BCS_BATCH(batch, 11);;
935 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
936 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
941 (chroma_log2_weight_denom << 8) |
942 (luma_log2_weight_denom << 0));
945 (weighted_pred_idc << 30) |
946 (slice_param->direct_spatial_mv_pred_flag << 29) | /*Direct Prediction Type*/
947 (slice_param->disable_deblocking_filter_idc << 27) |
948 (slice_param->cabac_init_idc << 24) |
949 (qp << 16) | /*Slice Quantization Parameter*/
950 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
951 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
953 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
955 slice_param->macroblock_address);
956 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
958 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
959 (1 << 30) | /*ResetRateControlCounter*/
960 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
961 (4 << 24) | /*RC Stable Tolerance, middle level*/
962 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
963 (0 << 22) | /*QP mode, don't modfiy CBP*/
964 (0 << 21) | /*MB Type Direct Conversion Enabled*/
965 (0 << 20) | /*MB Type Skip Conversion Enabled*/
966 (last_slice << 19) | /*IsLastSlice*/
967 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
968 (1 << 17) | /*HeaderPresentFlag*/
969 (1 << 16) | /*SliceData PresentFlag*/
970 (1 << 15) | /*TailPresentFlag*/
971 (1 << 13) | /*RBSP NAL TYPE*/
972 (0 << 12)); /*CabacZeroWordInsertionEnable*/
973 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
975 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
976 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
986 OUT_BCS_BATCH(batch, 0);
988 ADVANCE_BCS_BATCH(batch);
991 #define AVC_INTRA_RDO_OFFSET 4
992 #define AVC_INTER_RDO_OFFSET 10
993 #define AVC_INTER_MSG_OFFSET 8
994 #define AVC_INTER_MV_OFFSET 48
995 #define AVC_RDO_MASK 0xFFFF
998 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
999 int qp, unsigned int *msg,
1000 struct intel_encoder_context *encoder_context,
1001 unsigned char target_mb_size, unsigned char max_mb_size,
1002 struct intel_batchbuffer *batch)
1004 int len_in_dwords = 12;
1005 unsigned int intra_msg;
1006 #define INTRA_MSG_FLAG (1 << 13)
1007 #define INTRA_MBTYPE_MASK (0x1F0000)
1009 batch = encoder_context->base.batch;
1011 BEGIN_BCS_BATCH(batch, len_in_dwords);
1013 intra_msg = msg[0] & 0xC0FF;
1014 intra_msg |= INTRA_MSG_FLAG;
1015 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1016 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1017 OUT_BCS_BATCH(batch, 0);
1018 OUT_BCS_BATCH(batch, 0);
1019 OUT_BCS_BATCH(batch,
1020 (0 << 24) | /* PackedMvNum, Debug*/
1021 (0 << 20) | /* No motion vector */
1022 (1 << 19) | /* CbpDcY */
1023 (1 << 18) | /* CbpDcU */
1024 (1 << 17) | /* CbpDcV */
1027 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1028 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1029 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1031 /*Stuff for Intra MB*/
1032 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1033 OUT_BCS_BATCH(batch, msg[2]);
1034 OUT_BCS_BATCH(batch, msg[3] & 0xFF);
1036 /*MaxSizeInWord and TargetSzieInWord*/
1037 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1038 (target_mb_size << 16));
1040 OUT_BCS_BATCH(batch, 0);
1042 ADVANCE_BCS_BATCH(batch);
1044 return len_in_dwords;
1048 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1049 unsigned int *msg, unsigned int offset,
1050 struct intel_encoder_context *encoder_context,
1051 unsigned char target_mb_size, unsigned char max_mb_size, int slice_type,
1052 struct intel_batchbuffer *batch)
1054 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1055 int len_in_dwords = 12;
1056 unsigned int inter_msg = 0;
1058 batch = encoder_context->base.batch;
1060 #define MSG_MV_OFFSET 4
1061 unsigned int *mv_ptr;
1062 mv_ptr = msg + MSG_MV_OFFSET;
1063 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1064 * to convert them to be compatible with the format of AVC_PAK
1067 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1068 /* MV[0] and MV[2] are replicated */
1069 mv_ptr[4] = mv_ptr[0];
1070 mv_ptr[5] = mv_ptr[1];
1071 mv_ptr[2] = mv_ptr[8];
1072 mv_ptr[3] = mv_ptr[9];
1073 mv_ptr[6] = mv_ptr[8];
1074 mv_ptr[7] = mv_ptr[9];
1075 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1076 /* MV[0] and MV[1] are replicated */
1077 mv_ptr[2] = mv_ptr[0];
1078 mv_ptr[3] = mv_ptr[1];
1079 mv_ptr[4] = mv_ptr[16];
1080 mv_ptr[5] = mv_ptr[17];
1081 mv_ptr[6] = mv_ptr[24];
1082 mv_ptr[7] = mv_ptr[25];
1083 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1084 !(msg[1] & SUBMB_SHAPE_MASK)) {
1085 /* Don't touch MV[0] or MV[1] */
1086 mv_ptr[2] = mv_ptr[8];
1087 mv_ptr[3] = mv_ptr[9];
1088 mv_ptr[4] = mv_ptr[16];
1089 mv_ptr[5] = mv_ptr[17];
1090 mv_ptr[6] = mv_ptr[24];
1091 mv_ptr[7] = mv_ptr[25];
1095 BEGIN_BCS_BATCH(batch, len_in_dwords);
1097 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1101 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1102 if (msg[1] & SUBMB_SHAPE_MASK)
1105 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1106 OUT_BCS_BATCH(batch, offset);
1107 inter_msg = msg[0] & (0x1F00FFFF);
1108 inter_msg |= INTER_MV8;
1109 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1110 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1111 (msg[1] & SUBMB_SHAPE_MASK)) {
1112 inter_msg |= INTER_MV32;
1115 OUT_BCS_BATCH(batch, inter_msg);
1117 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1118 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1120 if (slice_type == SLICE_TYPE_B) {
1121 OUT_BCS_BATCH(batch, (0xF << 28) | (end_mb << 26) | qp); /* Last MB */
1123 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1126 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1129 inter_msg = msg[1] >> 8;
1130 /*Stuff for Inter MB*/
1131 OUT_BCS_BATCH(batch, inter_msg);
1132 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1133 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1135 /*MaxSizeInWord and TargetSzieInWord*/
1136 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1137 (target_mb_size << 16));
1139 OUT_BCS_BATCH(batch, 0x0);
1141 ADVANCE_BCS_BATCH(batch);
1143 return len_in_dwords;
1147 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1148 struct encode_state *encode_state,
1149 struct intel_encoder_context *encoder_context,
1151 struct intel_batchbuffer *slice_batch)
1153 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1154 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1155 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1156 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1157 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1158 unsigned int *msg = NULL, offset = 0;
1159 unsigned char *msg_ptr = NULL;
1160 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1161 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1162 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1164 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1165 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1166 unsigned int tail_data[] = { 0x0, 0x0 };
1167 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1168 int is_intra = slice_type == SLICE_TYPE_I;
1173 if (rate_control_mode != VA_RC_CQP) {
1174 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1175 if (encode_state->slice_header_index[slice_index] == 0) {
1176 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1181 /* only support for 8-bit pixel bit-depth */
1182 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1183 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1184 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1185 assert(qp >= 0 && qp < 52);
1187 gen8_mfc_avc_slice_state(ctx,
1190 encode_state, encoder_context,
1191 (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
1193 if (slice_index == 0) {
1194 intel_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, slice_batch);
1195 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1198 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1200 dri_bo_map(vme_context->vme_output.bo, 1);
1201 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1204 msg = (unsigned int *)(msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1206 msg = (unsigned int *)(msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1209 for (i = pSliceParameter->macroblock_address;
1210 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1211 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1));
1212 x = i % width_in_mbs;
1213 y = i / width_in_mbs;
1214 msg = (unsigned int *)(msg_ptr + i * vme_context->vme_output.size_block);
1215 if (vme_context->roi_enabled) {
1216 qp_mb = *(vme_context->qp_per_mb + i);
1222 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1224 int inter_rdo, intra_rdo;
1225 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1226 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1227 offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1228 if (intra_rdo < inter_rdo) {
1229 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1231 msg += AVC_INTER_MSG_OFFSET;
1232 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1237 dri_bo_unmap(vme_context->vme_output.bo);
1240 mfc_context->insert_object(ctx, encoder_context,
1242 2, 1, 1, 0, slice_batch);
1244 mfc_context->insert_object(ctx, encoder_context,
1246 1, 1, 1, 0, slice_batch);
1251 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1252 struct encode_state *encode_state,
1253 struct intel_encoder_context *encoder_context)
1255 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1256 struct intel_batchbuffer *batch;
1260 batch = mfc_context->aux_batchbuffer;
1261 batch_bo = batch->buffer;
1262 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1263 gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1266 intel_batchbuffer_align(batch, 8);
1268 BEGIN_BCS_BATCH(batch, 2);
1269 OUT_BCS_BATCH(batch, 0);
1270 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1271 ADVANCE_BCS_BATCH(batch);
1273 dri_bo_reference(batch_bo);
1274 intel_batchbuffer_free(batch);
1275 mfc_context->aux_batchbuffer = NULL;
1282 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1283 struct encode_state *encode_state,
1284 struct intel_encoder_context *encoder_context)
1286 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1287 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1289 assert(vme_context->vme_output.bo);
1290 mfc_context->buffer_suface_setup(ctx,
1291 &mfc_context->gpe_context,
1292 &vme_context->vme_output,
1293 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1294 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1298 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1299 struct encode_state *encode_state,
1300 struct intel_encoder_context *encoder_context)
1302 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1303 assert(mfc_context->aux_batchbuffer_surface.bo);
1304 mfc_context->buffer_suface_setup(ctx,
1305 &mfc_context->gpe_context,
1306 &mfc_context->aux_batchbuffer_surface,
1307 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1308 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1312 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1313 struct encode_state *encode_state,
1314 struct intel_encoder_context *encoder_context)
1316 gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1317 gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1321 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1322 struct encode_state *encode_state,
1323 struct intel_encoder_context *encoder_context)
1325 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1326 struct gen8_interface_descriptor_data *desc;
1329 unsigned char *desc_ptr;
1331 bo = mfc_context->gpe_context.idrt.bo;
1333 assert(bo->virtual);
1334 desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;
1336 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1338 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1339 struct i965_kernel *kernel;
1340 kernel = &mfc_context->gpe_context.kernels[i];
1341 assert(sizeof(*desc) == 32);
1342 /*Setup the descritor table*/
1343 memset(desc, 0, sizeof(*desc));
1344 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1345 desc->desc3.sampler_count = 0;
1346 desc->desc3.sampler_state_pointer = 0;
1347 desc->desc4.binding_table_entry_count = 1;
1348 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1349 desc->desc5.constant_urb_entry_read_offset = 0;
1350 desc->desc5.constant_urb_entry_read_length = 4;
1362 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1363 struct encode_state *encode_state,
1364 struct intel_encoder_context *encoder_context)
1366 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1371 #define AVC_PAK_LEN_IN_BYTE 48
1372 #define AVC_PAK_LEN_IN_OWORD 3
1375 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1376 uint32_t intra_flag,
1388 uint32_t temp_value;
1389 BEGIN_BATCH(batch, 14);
1391 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1392 OUT_BATCH(batch, 0);
1393 OUT_BATCH(batch, 0);
1394 OUT_BATCH(batch, 0);
1395 OUT_BATCH(batch, 0);
1396 OUT_BATCH(batch, 0);
1399 OUT_BATCH(batch, head_offset / 16);
1400 OUT_BATCH(batch, (intra_flag) | (qp << 16));
1401 temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1402 OUT_BATCH(batch, temp_value);
1404 OUT_BATCH(batch, number_mb_cmds);
1407 ((slice_end_y << 8) | (slice_end_x)));
1408 OUT_BATCH(batch, fwd_ref);
1409 OUT_BATCH(batch, bwd_ref);
1411 OUT_BATCH(batch, MI_NOOP);
1413 ADVANCE_BATCH(batch);
1417 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1418 struct intel_encoder_context *encoder_context,
1419 VAEncSliceParameterBufferH264 *slice_param,
1424 struct intel_batchbuffer *batch = encoder_context->base.batch;
1425 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1426 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1427 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1428 int total_mbs = slice_param->num_macroblocks;
1429 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1430 int number_mb_cmds = 128;
1431 int starting_offset = 0;
1433 int last_mb, slice_end_x, slice_end_y;
1434 int remaining_mb = total_mbs;
1435 uint32_t fwd_ref, bwd_ref, mb_flag;
1437 int number_roi_mbs, max_mb_cmds, i;
1439 last_mb = slice_param->macroblock_address + total_mbs - 1;
1440 slice_end_x = last_mb % width_in_mbs;
1441 slice_end_y = last_mb / width_in_mbs;
1443 if (slice_type == SLICE_TYPE_I) {
1448 fwd_ref = vme_context->ref_index_in_mb[0];
1449 bwd_ref = vme_context->ref_index_in_mb[1];
1453 if (width_in_mbs >= 100) {
1454 number_mb_cmds = width_in_mbs / 5;
1455 } else if (width_in_mbs >= 80) {
1456 number_mb_cmds = width_in_mbs / 4;
1457 } else if (width_in_mbs >= 60) {
1458 number_mb_cmds = width_in_mbs / 3;
1459 } else if (width_in_mbs >= 40) {
1460 number_mb_cmds = width_in_mbs / 2;
1462 number_mb_cmds = width_in_mbs;
1465 max_mb_cmds = number_mb_cmds;
1468 mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1469 mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1471 number_mb_cmds = max_mb_cmds;
1472 if (vme_context->roi_enabled) {
1475 tmp_qp = *(vme_context->qp_per_mb + starting_offset);
1476 for (i = 1; i < max_mb_cmds; i++) {
1477 if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i))
1483 number_mb_cmds = number_roi_mbs;
1487 if (number_mb_cmds >= remaining_mb) {
1488 number_mb_cmds = remaining_mb;
1491 gen8_mfc_batchbuffer_emit_object_command(batch,
1504 head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1505 remaining_mb -= number_mb_cmds;
1506 starting_offset += number_mb_cmds;
1507 } while (remaining_mb > 0);
1511 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1512 struct encode_state *encode_state,
1513 struct intel_encoder_context *encoder_context,
1516 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1517 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1518 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1519 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1520 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1521 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1522 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1523 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1524 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1525 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1526 unsigned int tail_data[] = { 0x0, 0x0 };
1528 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1532 if (rate_control_mode != VA_RC_CQP) {
1533 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1534 if (encode_state->slice_header_index[slice_index] == 0) {
1535 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1540 /* only support for 8-bit pixel bit-depth */
1541 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1542 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1543 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1544 assert(qp >= 0 && qp < 52);
1546 gen8_mfc_avc_slice_state(ctx,
1551 (rate_control_mode != VA_RC_CQP),
1555 if (slice_index == 0) {
1556 intel_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, slice_batch);
1557 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1560 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1562 intel_batchbuffer_align(slice_batch, 64); /* aligned by an Cache-line */
1563 head_offset = intel_batchbuffer_used_size(slice_batch);
1565 slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1567 gen8_mfc_avc_batchbuffer_slice_command(ctx,
1575 /* Aligned for tail */
1576 intel_batchbuffer_align(slice_batch, 64); /* aligned by Cache-line */
1578 mfc_context->insert_object(ctx,
1589 mfc_context->insert_object(ctx,
1605 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1606 struct encode_state *encode_state,
1607 struct intel_encoder_context *encoder_context)
1609 struct i965_driver_data *i965 = i965_driver_data(ctx);
1610 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1611 struct intel_batchbuffer *batch = encoder_context->base.batch;
1614 intel_batchbuffer_start_atomic(batch, 0x4000);
1616 if (IS_GEN9(i965->intel.device_info))
1617 gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1619 gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1621 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1622 gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1625 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1627 intel_batchbuffer_align(slice_batch, 8);
1628 BEGIN_BCS_BATCH(slice_batch, 2);
1629 OUT_BCS_BATCH(slice_batch, 0);
1630 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1631 ADVANCE_BCS_BATCH(slice_batch);
1633 BEGIN_BATCH(batch, 2);
1634 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1635 OUT_BATCH(batch, 0);
1636 ADVANCE_BATCH(batch);
1638 intel_batchbuffer_free(slice_batch);
1639 mfc_context->aux_batchbuffer = NULL;
1642 if (IS_GEN9(i965->intel.device_info))
1643 gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1645 intel_batchbuffer_end_atomic(batch);
1646 intel_batchbuffer_flush(batch);
1651 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1652 struct encode_state *encode_state,
1653 struct intel_encoder_context *encoder_context)
1655 gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1656 gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1657 gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1658 gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1662 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1663 struct encode_state *encode_state,
1664 struct intel_encoder_context *encoder_context)
1666 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1668 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1669 gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1671 return mfc_context->aux_batchbuffer_surface.bo;
1675 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1676 struct encode_state *encode_state,
1677 struct intel_encoder_context *encoder_context)
1679 struct intel_batchbuffer *batch = encoder_context->base.batch;
1680 dri_bo *slice_batch_bo;
1682 if (intel_mfc_interlace_check(ctx, encode_state, encoder_context)) {
1683 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1688 if (encoder_context->soft_batch_force)
1689 slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1691 slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1695 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1696 intel_batchbuffer_emit_mi_flush(batch);
1698 // picture level programing
1699 gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1701 BEGIN_BCS_BATCH(batch, 3);
1702 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1703 OUT_BCS_RELOC64(batch,
1705 I915_GEM_DOMAIN_COMMAND, 0,
1707 ADVANCE_BCS_BATCH(batch);
1710 intel_batchbuffer_end_atomic(batch);
1712 dri_bo_unreference(slice_batch_bo);
1717 gen8_mfc_avc_encode_picture(VADriverContextP ctx,
1718 struct encode_state *encode_state,
1719 struct intel_encoder_context *encoder_context)
1721 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1722 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1723 int current_frame_bits_size;
1727 gen8_mfc_init(ctx, encode_state, encoder_context);
1728 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1729 /*Programing bcs pipeline*/
1730 gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1731 gen8_mfc_run(ctx, encode_state, encoder_context);
1732 if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
1733 gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1734 sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1735 if (sts == BRC_NO_HRD_VIOLATION) {
1736 intel_mfc_hrd_context_update(encode_state, mfc_context);
1738 } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1739 if (!mfc_context->hrd.violation_noted) {
1740 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
1741 mfc_context->hrd.violation_noted = 1;
1743 return VA_STATUS_SUCCESS;
1750 return VA_STATUS_SUCCESS;
1758 va_to_gen8_mpeg2_picture_type[3] = {
1765 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1766 struct intel_encoder_context *encoder_context,
1767 struct encode_state *encode_state)
1769 struct intel_batchbuffer *batch = encoder_context->base.batch;
1770 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1771 VAEncPictureParameterBufferMPEG2 *pic_param;
1772 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1773 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1774 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1776 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1777 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1778 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1780 BEGIN_BCS_BATCH(batch, 13);
1781 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1782 OUT_BCS_BATCH(batch,
1783 (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1784 (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1785 (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1786 (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1787 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1788 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1789 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1790 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1791 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1792 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1793 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1794 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1795 OUT_BCS_BATCH(batch,
1796 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1797 va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1799 OUT_BCS_BATCH(batch,
1800 1 << 31 | /* slice concealment */
1801 (height_in_mbs - 1) << 16 |
1802 (width_in_mbs - 1));
1804 if (slice_param && slice_param->quantiser_scale_code >= 14)
1805 OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1807 OUT_BCS_BATCH(batch, 0);
1809 OUT_BCS_BATCH(batch, 0);
1810 OUT_BCS_BATCH(batch,
1811 0xFFF << 16 | /* InterMBMaxSize */
1812 0xFFF << 0 | /* IntraMBMaxSize */
1814 OUT_BCS_BATCH(batch, 0);
1815 OUT_BCS_BATCH(batch, 0);
1816 OUT_BCS_BATCH(batch, 0);
1817 OUT_BCS_BATCH(batch, 0);
1818 OUT_BCS_BATCH(batch, 0);
1819 OUT_BCS_BATCH(batch, 0);
1820 ADVANCE_BCS_BATCH(batch);
1824 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1826 unsigned char intra_qm[64] = {
1827 8, 16, 19, 22, 26, 27, 29, 34,
1828 16, 16, 22, 24, 27, 29, 34, 37,
1829 19, 22, 26, 27, 29, 34, 34, 38,
1830 22, 22, 26, 27, 29, 34, 37, 40,
1831 22, 26, 27, 29, 32, 35, 40, 48,
1832 26, 27, 29, 32, 35, 40, 48, 58,
1833 26, 27, 29, 34, 38, 46, 56, 69,
1834 27, 29, 35, 38, 46, 56, 69, 83
1837 unsigned char non_intra_qm[64] = {
1838 16, 16, 16, 16, 16, 16, 16, 16,
1839 16, 16, 16, 16, 16, 16, 16, 16,
1840 16, 16, 16, 16, 16, 16, 16, 16,
1841 16, 16, 16, 16, 16, 16, 16, 16,
1842 16, 16, 16, 16, 16, 16, 16, 16,
1843 16, 16, 16, 16, 16, 16, 16, 16,
1844 16, 16, 16, 16, 16, 16, 16, 16,
1845 16, 16, 16, 16, 16, 16, 16, 16
1848 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1849 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16, encoder_context);
1853 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1855 unsigned short intra_fqm[64] = {
1856 65536 / 0x8, 65536 / 0x10, 65536 / 0x13, 65536 / 0x16, 65536 / 0x16, 65536 / 0x1a, 65536 / 0x1a, 65536 / 0x1b,
1857 65536 / 0x10, 65536 / 0x10, 65536 / 0x16, 65536 / 0x16, 65536 / 0x1a, 65536 / 0x1b, 65536 / 0x1b, 65536 / 0x1d,
1858 65536 / 0x13, 65536 / 0x16, 65536 / 0x1a, 65536 / 0x1a, 65536 / 0x1b, 65536 / 0x1d, 65536 / 0x1d, 65536 / 0x23,
1859 65536 / 0x16, 65536 / 0x18, 65536 / 0x1b, 65536 / 0x1b, 65536 / 0x13, 65536 / 0x20, 65536 / 0x22, 65536 / 0x26,
1860 65536 / 0x1a, 65536 / 0x1b, 65536 / 0x13, 65536 / 0x13, 65536 / 0x20, 65536 / 0x23, 65536 / 0x26, 65536 / 0x2e,
1861 65536 / 0x1b, 65536 / 0x1d, 65536 / 0x22, 65536 / 0x22, 65536 / 0x23, 65536 / 0x28, 65536 / 0x2e, 65536 / 0x38,
1862 65536 / 0x1d, 65536 / 0x22, 65536 / 0x22, 65536 / 0x25, 65536 / 0x28, 65536 / 0x30, 65536 / 0x38, 65536 / 0x45,
1863 65536 / 0x22, 65536 / 0x25, 65536 / 0x26, 65536 / 0x28, 65536 / 0x30, 65536 / 0x3a, 65536 / 0x45, 65536 / 0x53,
1866 unsigned short non_intra_fqm[64] = {
1867 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1868 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1869 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1872 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1873 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1877 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1878 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1882 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1883 struct intel_encoder_context *encoder_context,
1885 int next_x, int next_y,
1886 int is_fisrt_slice_group,
1887 int is_last_slice_group,
1890 struct intel_batchbuffer *batch)
1892 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1895 batch = encoder_context->base.batch;
1897 BEGIN_BCS_BATCH(batch, 8);
1899 OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1900 OUT_BCS_BATCH(batch,
1901 0 << 31 | /* MbRateCtrlFlag */
1902 !!is_last_slice_group << 19 | /* IsLastSliceGrp */
1903 1 << 17 | /* Insert Header before the first slice group data */
1904 1 << 16 | /* SliceData PresentFlag: always 1 */
1905 1 << 15 | /* TailPresentFlag: always 1 */
1906 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
1907 !!intra_slice << 13 | /* IntraSlice */
1908 !!intra_slice << 12 | /* IntraSliceFlag */
1910 OUT_BCS_BATCH(batch,
1916 OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
1917 /* bitstream pointer is only loaded once for the first slice of a frame when
1918 * LoadSlicePointerFlag is 0
1920 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1921 OUT_BCS_BATCH(batch, 0); /* FIXME: */
1922 OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
1923 OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
1925 ADVANCE_BCS_BATCH(batch);
1929 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1930 struct intel_encoder_context *encoder_context,
1932 int first_mb_in_slice,
1933 int last_mb_in_slice,
1934 int first_mb_in_slice_group,
1935 int last_mb_in_slice_group,
1938 int coded_block_pattern,
1939 unsigned char target_size_in_word,
1940 unsigned char max_size_in_word,
1941 struct intel_batchbuffer *batch)
1943 int len_in_dwords = 9;
1946 batch = encoder_context->base.batch;
1948 BEGIN_BCS_BATCH(batch, len_in_dwords);
1950 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1951 OUT_BCS_BATCH(batch,
1952 0 << 24 | /* PackedMvNum */
1953 0 << 20 | /* MvFormat */
1954 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1955 0 << 15 | /* TransformFlag: frame DCT */
1956 0 << 14 | /* FieldMbFlag */
1957 1 << 13 | /* IntraMbFlag */
1958 mb_type << 8 | /* MbType: Intra */
1959 0 << 2 | /* SkipMbFlag */
1960 0 << 0 | /* InterMbMode */
1962 OUT_BCS_BATCH(batch, y << 16 | x);
1963 OUT_BCS_BATCH(batch,
1964 max_size_in_word << 24 |
1965 target_size_in_word << 16 |
1966 coded_block_pattern << 6 | /* CBP */
1968 OUT_BCS_BATCH(batch,
1969 last_mb_in_slice << 31 |
1970 first_mb_in_slice << 30 |
1971 0 << 27 | /* EnableCoeffClamp */
1972 last_mb_in_slice_group << 26 |
1973 0 << 25 | /* MbSkipConvDisable */
1974 first_mb_in_slice_group << 24 |
1975 0 << 16 | /* MvFieldSelect */
1976 qp_scale_code << 0 |
1978 OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
1979 OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
1980 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1981 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1983 ADVANCE_BCS_BATCH(batch);
1985 return len_in_dwords;
1989 #define MPEG2_INTER_MV_OFFSET 48
1991 static struct _mv_ranges {
1992 int low; /* in the unit of 1/2 pixel */
1993 int high; /* in the unit of 1/2 pixel */
2008 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2010 if (mv + pos * 16 * 2 < 0 ||
2011 mv + (pos + 1) * 16 * 2 > display_max * 2)
2014 if (f_code > 0 && f_code < 10) {
2015 if (mv < mv_ranges[f_code].low)
2016 mv = mv_ranges[f_code].low;
2018 if (mv > mv_ranges[f_code].high)
2019 mv = mv_ranges[f_code].high;
2026 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2027 struct encode_state *encode_state,
2028 struct intel_encoder_context *encoder_context,
2030 int width_in_mbs, int height_in_mbs,
2032 int first_mb_in_slice,
2033 int last_mb_in_slice,
2034 int first_mb_in_slice_group,
2035 int last_mb_in_slice_group,
2037 unsigned char target_size_in_word,
2038 unsigned char max_size_in_word,
2039 struct intel_batchbuffer *batch)
2041 VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2042 int len_in_dwords = 9;
2043 short *mvptr, mvx0, mvy0, mvx1, mvy1;
2046 batch = encoder_context->base.batch;
2048 mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
2049 mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2050 mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2051 mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2052 mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2054 BEGIN_BCS_BATCH(batch, len_in_dwords);
2056 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2057 OUT_BCS_BATCH(batch,
2058 2 << 24 | /* PackedMvNum */
2059 7 << 20 | /* MvFormat */
2060 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
2061 0 << 15 | /* TransformFlag: frame DCT */
2062 0 << 14 | /* FieldMbFlag */
2063 0 << 13 | /* IntraMbFlag */
2064 1 << 8 | /* MbType: Frame-based */
2065 0 << 2 | /* SkipMbFlag */
2066 0 << 0 | /* InterMbMode */
2068 OUT_BCS_BATCH(batch, y << 16 | x);
2069 OUT_BCS_BATCH(batch,
2070 max_size_in_word << 24 |
2071 target_size_in_word << 16 |
2072 0x3f << 6 | /* CBP */
2074 OUT_BCS_BATCH(batch,
2075 last_mb_in_slice << 31 |
2076 first_mb_in_slice << 30 |
2077 0 << 27 | /* EnableCoeffClamp */
2078 last_mb_in_slice_group << 26 |
2079 0 << 25 | /* MbSkipConvDisable */
2080 first_mb_in_slice_group << 24 |
2081 0 << 16 | /* MvFieldSelect */
2082 qp_scale_code << 0 |
2085 OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
2086 OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
2087 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
2088 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
2090 ADVANCE_BCS_BATCH(batch);
2092 return len_in_dwords;
2096 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2097 struct encode_state *encode_state,
2098 struct intel_encoder_context *encoder_context,
2099 struct intel_batchbuffer *slice_batch)
2101 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2102 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2104 if (encode_state->packed_header_data[idx]) {
2105 VAEncPackedHeaderParameterBuffer *param = NULL;
2106 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2107 unsigned int length_in_bits;
2109 assert(encode_state->packed_header_param[idx]);
2110 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2111 length_in_bits = param->bit_length;
2113 mfc_context->insert_object(ctx,
2116 ALIGN(length_in_bits, 32) >> 5,
2117 length_in_bits & 0x1f,
2118 5, /* FIXME: check it */
2121 0, /* Needn't insert emulation bytes for MPEG-2 */
2125 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2127 if (encode_state->packed_header_data[idx]) {
2128 VAEncPackedHeaderParameterBuffer *param = NULL;
2129 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2130 unsigned int length_in_bits;
2132 assert(encode_state->packed_header_param[idx]);
2133 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2134 length_in_bits = param->bit_length;
2136 mfc_context->insert_object(ctx,
2139 ALIGN(length_in_bits, 32) >> 5,
2140 length_in_bits & 0x1f,
2141 5, /* FIXME: check it */
2144 0, /* Needn't insert emulation bytes for MPEG-2 */
2150 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2151 struct encode_state *encode_state,
2152 struct intel_encoder_context *encoder_context,
2154 VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2155 struct intel_batchbuffer *slice_batch)
2157 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2158 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2159 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2160 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2161 unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2162 unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2163 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2164 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2166 int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2167 unsigned int *msg = NULL;
2168 unsigned char *msg_ptr = NULL;
2170 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2171 h_start_pos = slice_param->macroblock_address % width_in_mbs;
2172 v_start_pos = slice_param->macroblock_address / width_in_mbs;
2173 assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2175 dri_bo_map(vme_context->vme_output.bo, 0);
2176 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2178 if (next_slice_group_param) {
2179 h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2180 v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2182 h_next_start_pos = 0;
2183 v_next_start_pos = height_in_mbs;
2186 gen8_mfc_mpeg2_slicegroup_state(ctx,
2193 next_slice_group_param == NULL,
2194 slice_param->is_intra_slice,
2195 slice_param->quantiser_scale_code,
2198 if (slice_index == 0)
2199 intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2201 /* Insert '00' to make sure the header is valid */
2202 mfc_context->insert_object(ctx,
2204 (unsigned int*)section_delimiter,
2206 8, /* 8bits in the last DWORD */
2213 for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2214 /* PAK for each macroblocks */
2215 for (j = 0; j < slice_param->num_macroblocks; j++) {
2216 int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2217 int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2218 int first_mb_in_slice = (j == 0);
2219 int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2220 int first_mb_in_slice_group = (i == 0 && j == 0);
2221 int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2222 j == slice_param->num_macroblocks - 1);
2224 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2226 if (slice_param->is_intra_slice) {
2227 gen8_mfc_mpeg2_pak_object_intra(ctx,
2232 first_mb_in_slice_group,
2233 last_mb_in_slice_group,
2235 slice_param->quantiser_scale_code,
2241 int inter_rdo, intra_rdo;
2242 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2243 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2245 if (intra_rdo < inter_rdo)
2246 gen8_mfc_mpeg2_pak_object_intra(ctx,
2251 first_mb_in_slice_group,
2252 last_mb_in_slice_group,
2254 slice_param->quantiser_scale_code,
2260 gen8_mfc_mpeg2_pak_object_inter(ctx,
2264 width_in_mbs, height_in_mbs,
2268 first_mb_in_slice_group,
2269 last_mb_in_slice_group,
2270 slice_param->quantiser_scale_code,
2280 dri_bo_unmap(vme_context->vme_output.bo);
2283 if (next_slice_group_param == NULL) { /* end of a picture */
2284 mfc_context->insert_object(ctx,
2286 (unsigned int *)tail_delimiter,
2288 8, /* 8bits in the last DWORD */
2294 } else { /* end of a lsice group */
2295 mfc_context->insert_object(ctx,
2297 (unsigned int *)section_delimiter,
2299 8, /* 8bits in the last DWORD */
2309 * A batch buffer for all slices, including slice state,
2310 * slice insert object and slice pak object commands
2314 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2315 struct encode_state *encode_state,
2316 struct intel_encoder_context *encoder_context)
2318 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2319 struct intel_batchbuffer *batch;
2320 VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2324 batch = mfc_context->aux_batchbuffer;
2325 batch_bo = batch->buffer;
2327 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2328 if (i == encode_state->num_slice_params_ext - 1)
2329 next_slice_group_param = NULL;
2331 next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2333 gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2336 intel_batchbuffer_align(batch, 8);
2338 BEGIN_BCS_BATCH(batch, 2);
2339 OUT_BCS_BATCH(batch, 0);
2340 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2341 ADVANCE_BCS_BATCH(batch);
2343 dri_bo_reference(batch_bo);
2344 intel_batchbuffer_free(batch);
2345 mfc_context->aux_batchbuffer = NULL;
2351 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2352 struct encode_state *encode_state,
2353 struct intel_encoder_context *encoder_context)
2355 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2357 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2358 mfc_context->set_surface_state(ctx, encoder_context);
2359 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2360 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2361 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2362 gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2363 gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2364 gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2368 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2369 struct encode_state *encode_state,
2370 struct intel_encoder_context *encoder_context)
2372 struct intel_batchbuffer *batch = encoder_context->base.batch;
2373 dri_bo *slice_batch_bo;
2375 slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2378 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2379 intel_batchbuffer_emit_mi_flush(batch);
2381 // picture level programing
2382 gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2384 BEGIN_BCS_BATCH(batch, 4);
2385 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2386 OUT_BCS_RELOC64(batch,
2388 I915_GEM_DOMAIN_COMMAND, 0,
2390 OUT_BCS_BATCH(batch, 0);
2391 ADVANCE_BCS_BATCH(batch);
2394 intel_batchbuffer_end_atomic(batch);
2396 dri_bo_unreference(slice_batch_bo);
2400 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2401 struct encode_state *encode_state,
2402 struct intel_encoder_context *encoder_context)
2404 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2405 struct object_surface *obj_surface;
2406 struct object_buffer *obj_buffer;
2407 struct i965_coded_buffer_segment *coded_buffer_segment;
2408 VAStatus vaStatus = VA_STATUS_SUCCESS;
2412 /* reconstructed surface */
2413 obj_surface = encode_state->reconstructed_object;
2414 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2415 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2416 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2417 mfc_context->surface_state.width = obj_surface->orig_width;
2418 mfc_context->surface_state.height = obj_surface->orig_height;
2419 mfc_context->surface_state.w_pitch = obj_surface->width;
2420 mfc_context->surface_state.h_pitch = obj_surface->height;
2422 /* forward reference */
2423 obj_surface = encode_state->reference_objects[0];
2425 if (obj_surface && obj_surface->bo) {
2426 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2427 dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2429 mfc_context->reference_surfaces[0].bo = NULL;
2431 /* backward reference */
2432 obj_surface = encode_state->reference_objects[1];
2434 if (obj_surface && obj_surface->bo) {
2435 mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2436 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2438 mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2440 if (mfc_context->reference_surfaces[1].bo)
2441 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2444 for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2445 mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2447 if (mfc_context->reference_surfaces[i].bo)
2448 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2451 /* input YUV surface */
2452 obj_surface = encode_state->input_yuv_object;
2453 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2454 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2457 obj_buffer = encode_state->coded_buf_object;
2458 bo = obj_buffer->buffer_store->bo;
2459 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2460 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2461 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2462 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2464 /* set the internal flag to 0 to indicate the coded size is unknown */
2466 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2467 coded_buffer_segment->mapped = 0;
2468 coded_buffer_segment->codec = encoder_context->codec;
2475 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2476 struct encode_state *encode_state,
2477 struct intel_encoder_context *encoder_context)
2479 gen8_mfc_init(ctx, encode_state, encoder_context);
2480 intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2481 /*Programing bcs pipeline*/
2482 gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2483 gen8_mfc_run(ctx, encode_state, encoder_context);
2485 return VA_STATUS_SUCCESS;
2488 /* JPEG encode methods */
2491 intel_mfc_jpeg_prepare(VADriverContextP ctx,
2492 struct encode_state *encode_state,
2493 struct intel_encoder_context *encoder_context)
2495 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2496 struct object_surface *obj_surface;
2497 struct object_buffer *obj_buffer;
2498 struct i965_coded_buffer_segment *coded_buffer_segment;
2499 VAStatus vaStatus = VA_STATUS_SUCCESS;
2502 /* input YUV surface */
2503 obj_surface = encode_state->input_yuv_object;
2504 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2505 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2508 obj_buffer = encode_state->coded_buf_object;
2509 bo = obj_buffer->buffer_store->bo;
2510 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2511 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2512 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2513 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2515 /* set the internal flag to 0 to indicate the coded size is unknown */
2517 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2518 coded_buffer_segment->mapped = 0;
2519 coded_buffer_segment->codec = encoder_context->codec;
2527 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2528 struct intel_encoder_context *encoder_context,
2529 struct encode_state *encode_state)
2531 struct intel_batchbuffer *batch = encoder_context->base.batch;
2532 struct object_surface *obj_surface = encode_state->input_yuv_object;
2533 unsigned int input_fourcc;
2534 unsigned int y_cb_offset;
2535 unsigned int y_cr_offset;
2536 unsigned int surface_format;
2538 assert(obj_surface);
2540 y_cb_offset = obj_surface->y_cb_offset;
2541 y_cr_offset = obj_surface->y_cr_offset;
2542 input_fourcc = obj_surface->fourcc;
2544 surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2545 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2548 switch (input_fourcc) {
2549 case VA_FOURCC_Y800: {
2550 surface_format = MFX_SURFACE_MONOCHROME;
2553 case VA_FOURCC_NV12: {
2554 surface_format = MFX_SURFACE_PLANAR_420_8;
2557 case VA_FOURCC_UYVY: {
2558 surface_format = MFX_SURFACE_YCRCB_SWAPY;
2561 case VA_FOURCC_YUY2: {
2562 surface_format = MFX_SURFACE_YCRCB_NORMAL;
2565 case VA_FOURCC_RGBA:
2566 case VA_FOURCC_444P: {
2567 surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2572 BEGIN_BCS_BATCH(batch, 6);
2574 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2575 OUT_BCS_BATCH(batch, 0);
2576 OUT_BCS_BATCH(batch,
2577 ((obj_surface->orig_height - 1) << 18) |
2578 ((obj_surface->orig_width - 1) << 4));
2579 OUT_BCS_BATCH(batch,
2580 (surface_format << 28) | /* Surface Format */
2581 (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2582 (0 << 22) | /* surface object control state, FIXME??? */
2583 ((obj_surface->width - 1) << 3) | /* pitch */
2584 (0 << 2) | /* must be 0 for interleave U/V */
2585 (1 << 1) | /* must be tiled */
2586 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2587 OUT_BCS_BATCH(batch,
2588 (0 << 16) | /* X offset for U(Cb), must be 0 */
2589 (y_cb_offset << 0)); /* Y offset for U(Cb) */
2590 OUT_BCS_BATCH(batch,
2591 (0 << 16) | /* X offset for V(Cr), must be 0 */
2592 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2595 ADVANCE_BCS_BATCH(batch);
2599 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2600 struct intel_encoder_context *encoder_context,
2601 struct encode_state *encode_state)
2603 struct intel_batchbuffer *batch = encoder_context->base.batch;
2604 struct object_surface *obj_surface = encode_state->input_yuv_object;
2605 VAEncPictureParameterBufferJPEG *pic_param;
2606 unsigned int surface_format;
2607 unsigned int frame_width_in_blks;
2608 unsigned int frame_height_in_blks;
2609 unsigned int pixels_in_horizontal_lastMCU;
2610 unsigned int pixels_in_vertical_lastMCU;
2611 unsigned int input_surface_format;
2612 unsigned int output_mcu_format;
2613 unsigned int picture_width;
2614 unsigned int picture_height;
2616 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2617 assert(obj_surface);
2618 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2619 surface_format = obj_surface->fourcc;
2620 picture_width = pic_param->picture_width;
2621 picture_height = pic_param->picture_height;
2623 switch (surface_format) {
2624 case VA_FOURCC_Y800: {
2625 input_surface_format = JPEG_ENC_SURFACE_Y8;
2626 output_mcu_format = JPEG_ENC_MCU_YUV400;
2629 case VA_FOURCC_NV12: {
2630 input_surface_format = JPEG_ENC_SURFACE_NV12;
2631 output_mcu_format = JPEG_ENC_MCU_YUV420;
2634 case VA_FOURCC_UYVY: {
2635 input_surface_format = JPEG_ENC_SURFACE_UYVY;
2636 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2639 case VA_FOURCC_YUY2: {
2640 input_surface_format = JPEG_ENC_SURFACE_YUY2;
2641 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2645 case VA_FOURCC_RGBA:
2646 case VA_FOURCC_444P: {
2647 input_surface_format = JPEG_ENC_SURFACE_RGB;
2648 output_mcu_format = JPEG_ENC_MCU_RGB;
2652 input_surface_format = JPEG_ENC_SURFACE_NV12;
2653 output_mcu_format = JPEG_ENC_MCU_YUV420;
2659 switch (output_mcu_format) {
2661 case JPEG_ENC_MCU_YUV400:
2662 case JPEG_ENC_MCU_RGB: {
2663 pixels_in_horizontal_lastMCU = (picture_width % 8);
2664 pixels_in_vertical_lastMCU = (picture_height % 8);
2666 //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2667 frame_width_in_blks = ((picture_width + 7) / 8);
2668 frame_height_in_blks = ((picture_height + 7) / 8);
2672 case JPEG_ENC_MCU_YUV420: {
2673 if ((picture_width % 2) == 0)
2674 pixels_in_horizontal_lastMCU = picture_width % 16;
2676 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2678 if ((picture_height % 2) == 0)
2679 pixels_in_vertical_lastMCU = picture_height % 16;
2681 pixels_in_vertical_lastMCU = ((picture_height % 16) + 1) % 16;
2683 //H1=2,V1=2 for YUV420. So, compute these values accordingly
2684 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2685 frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2689 case JPEG_ENC_MCU_YUV422H_2Y: {
2690 if (picture_width % 2 == 0)
2691 pixels_in_horizontal_lastMCU = picture_width % 16;
2693 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2695 pixels_in_vertical_lastMCU = picture_height % 8;
2697 //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2698 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2699 frame_height_in_blks = ((picture_height + 7) / 8);
2704 BEGIN_BCS_BATCH(batch, 3);
2706 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2708 OUT_BCS_BATCH(batch,
2709 (pixels_in_horizontal_lastMCU << 26) | /* Pixels In Horizontal Last MCU */
2710 (pixels_in_vertical_lastMCU << 21) | /* Pixels In Vertical Last MCU */
2711 (input_surface_format << 8) | /* Input Surface format */
2712 (output_mcu_format << 0)); /* Output MCU Structure */
2714 OUT_BCS_BATCH(batch,
2715 ((frame_height_in_blks - 1) << 16) | /* Frame Height In Blks Minus 1 */
2716 (JPEG_ENC_ROUND_QUANT_DEFAULT << 13) | /* Rounding Quant set to default value 0 */
2717 ((frame_width_in_blks - 1) << 0)); /* Frame Width In Blks Minus 1 */
2718 ADVANCE_BCS_BATCH(batch);
2722 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2725 short reciprocal_qm[64];
2727 for (i = 0; i < 64; i++) {
2728 reciprocal_qm[i] = 65535 / (raster_qm[i]);
2731 for (i = 0; i < 64; i++) {
2732 dword_qm[j] = ((reciprocal_qm[i + 1] << 16) | (reciprocal_qm[i]));
2741 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2742 struct intel_encoder_context *encoder_context,
2743 struct encode_state *encode_state)
2745 unsigned int quality = 0;
2746 uint32_t temp, i = 0, j = 0, dword_qm[32];
2747 VAEncPictureParameterBufferJPEG *pic_param;
2748 VAQMatrixBufferJPEG *qmatrix;
2749 unsigned char raster_qm[64], column_raster_qm[64];
2750 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2752 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2753 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2754 quality = pic_param->quality;
2756 //If the app sends the qmatrix, use it, buffer it for using it with the next frames
2757 //The app can send qmatrix for the first frame and not send for the subsequent frames
2758 if (encode_state->q_matrix && encode_state->q_matrix->buffer) {
2759 qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2761 mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2762 memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2764 if (pic_param->num_components > 1) {
2765 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2766 memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2768 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2772 //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2773 qmatrix = &mfc_context->buffered_qmatrix;
2774 qmatrix->load_lum_quantiser_matrix = 1;
2775 qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2779 //As per the design, normalization of the quality factor and scaling of the Quantization tables
2780 //based on the quality factor needs to be done in the driver before sending the values to the HW.
2781 //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2782 //packed header information. The packed header is written as the header of the jpeg file. This
2783 //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2784 //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2785 //how to do this). QTables can be different for different applications. If no tables are provided,
2786 //the default tables in the driver are used.
2788 //Normalization of the quality factor
2789 if (quality > 100) quality = 100;
2790 if (quality == 0) quality = 1;
2791 quality = (quality < 50) ? (5000 / quality) : (200 - (quality * 2));
2793 //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2794 //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2795 //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2796 //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2799 if (qmatrix->load_lum_quantiser_matrix) {
2800 //apply quality to lum_quantiser_matrix
2801 for (i = 0; i < 64; i++) {
2802 temp = (qmatrix->lum_quantiser_matrix[i] * quality) / 100;
2803 //clamp to range [1,255]
2804 temp = (temp > 255) ? 255 : temp;
2805 temp = (temp < 1) ? 1 : temp;
2806 qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2809 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2810 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2811 for (j = 0; j < 64; j++)
2812 raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2814 //Convert the raster order(row-ordered) to the column-raster (column by column).
2815 //To be consistent with the other encoders, send it in column order.
2816 //Need to double check if our HW expects col or row raster.
2817 for (j = 0; j < 64; j++) {
2818 int row = j / 8, col = j % 8;
2819 column_raster_qm[col * 8 + row] = raster_qm[j];
2822 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2823 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2825 //send the luma qm to the command buffer
2826 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2829 //For Chroma, if chroma exists (Cb, Cr or G, B)
2830 if (qmatrix->load_chroma_quantiser_matrix) {
2831 //apply quality to chroma_quantiser_matrix
2832 for (i = 0; i < 64; i++) {
2833 temp = (qmatrix->chroma_quantiser_matrix[i] * quality) / 100;
2834 //clamp to range [1,255]
2835 temp = (temp > 255) ? 255 : temp;
2836 temp = (temp < 1) ? 1 : temp;
2837 qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2840 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2841 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2842 for (j = 0; j < 64; j++)
2843 raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2845 //Convert the raster order(row-ordered) to the column-raster (column by column).
2846 //To be consistent with the other encoders, send it in column order.
2847 //Need to double check if our HW expects col or row raster.
2848 for (j = 0; j < 64; j++) {
2849 int row = j / 8, col = j % 8;
2850 column_raster_qm[col * 8 + row] = raster_qm[j];
2854 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2855 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2857 //send the same chroma qm to the command buffer (for both U,V or G,B)
2858 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2859 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2864 //Translation of Table K.5 into code: This method takes the huffval from the
2865 //Huffmantable buffer and converts into index for the coefficients and size tables
2866 uint8_t map_huffval_to_index(uint8_t huff_val)
2870 if (huff_val < 0xF0) {
2871 index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2873 index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2880 //Implementation of Flow chart Annex C - Figure C.1
2882 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
2884 uint8_t i = 1, j = 1, k = 0;
2887 while (j <= (uint8_t)bits[i - 1]) {
2888 huff_size_table[k] = i;
2896 huff_size_table[k] = 0;
2900 //Implementation of Flow chart Annex C - Figure C.2
2902 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2906 uint8_t si = huff_size_table[k];
2908 while (huff_size_table[k] != 0) {
2910 while (huff_size_table[k] == si) {
2912 // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
2913 if (code == 0xFFFF) {
2917 huff_code_table[k] = code;
2928 //Implementation of Flow chat Annex C - Figure C.3
2930 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2932 uint8_t huff_val_size = 0, i = 0, k = 0;
2934 huff_val_size = (type == 0) ? 12 : 162;
2935 uint8_t huff_si_table[huff_val_size];
2936 uint16_t huff_co_table[huff_val_size];
2938 memset(huff_si_table, 0, sizeof(huff_si_table));
2939 memset(huff_co_table, 0, sizeof(huff_co_table));
2942 i = map_huffval_to_index(huff_vals[k]);
2943 huff_co_table[i] = huff_code_table[k];
2944 huff_si_table[i] = huff_size_table[k];
2946 } while (k < lastK);
2948 memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2949 memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2953 //This method converts the huffman table to code words which is needed by the HW
2954 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2956 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2958 uint8_t lastK = 0, i = 0;
2959 uint8_t huff_val_size = 0;
2960 uint8_t *huff_bits, *huff_vals;
2962 huff_val_size = (type == 0) ? 12 : 162;
2963 uint8_t huff_size_table[huff_val_size + 1]; //The +1 for adding 0 at the end of huff_val_size
2964 uint16_t huff_code_table[huff_val_size];
2966 memset(huff_size_table, 0, sizeof(huff_size_table));
2967 memset(huff_code_table, 0, sizeof(huff_code_table));
2969 huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2970 huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2973 //Generation of table of Huffman code sizes
2974 generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2976 //Generation of table of Huffman codes
2977 generate_huffman_codes_table(huff_size_table, huff_code_table);
2979 //Ordering procedure for encoding procedure code tables
2980 generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2982 //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2983 //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2984 for (i = 0; i < huff_val_size; i++) {
2986 table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
2991 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
2993 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
2994 struct encode_state *encode_state,
2995 struct intel_encoder_context *encoder_context,
2998 VAHuffmanTableBufferJPEGBaseline *huff_buffer;
2999 struct intel_batchbuffer *batch = encoder_context->base.batch;
3001 uint32_t dc_table[12], ac_table[162];
3003 assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
3004 huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
3006 memset(dc_table, 0, 12);
3007 memset(ac_table, 0, 162);
3009 for (index = 0; index < num_tables; index++) {
3010 int id = va_to_gen7_jpeg_hufftable[index];
3012 if (!huff_buffer->load_huffman_table[index])
3015 //load DC table with 12 DWords
3016 convert_hufftable_to_codes(huff_buffer, dc_table, 0, index); //0 for Dc
3018 //load AC table with 162 DWords
3019 convert_hufftable_to_codes(huff_buffer, ac_table, 1, index); //1 for AC
3021 BEGIN_BCS_BATCH(batch, 176);
3022 OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
3023 OUT_BCS_BATCH(batch, id); //Huff table id
3025 //DWord 2 - 13 has DC_TABLE
3026 intel_batchbuffer_data(batch, dc_table, 12 * 4);
3028 //Dword 14 -175 has AC_TABLE
3029 intel_batchbuffer_data(batch, ac_table, 162 * 4);
3030 ADVANCE_BCS_BATCH(batch);
3035 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
3036 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
3038 switch (surface_format) {
3039 case VA_FOURCC_Y800: {
3044 case VA_FOURCC_NV12: {
3049 case VA_FOURCC_UYVY: {
3054 case VA_FOURCC_YUY2: {
3059 case VA_FOURCC_RGBA:
3060 case VA_FOURCC_444P: {
3065 default : { //May be have to insert error handling here. For now just use as below
3073 //set MFC_JPEG_SCAN_OBJECT
3075 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
3076 struct encode_state *encode_state,
3077 struct intel_encoder_context *encoder_context)
3079 uint32_t mcu_count, surface_format, Mx, My;
3080 uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table = 0, huff_dc_table = 0;
3081 uint8_t is_last_scan = 1; //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3082 uint8_t head_present_flag = 1; //Header has tables and app data
3083 uint16_t num_components, restart_interval; //Specifies number of MCUs in an ECS.
3084 VAEncSliceParameterBufferJPEG *slice_param;
3085 VAEncPictureParameterBufferJPEG *pic_param;
3087 struct intel_batchbuffer *batch = encoder_context->base.batch;
3088 struct object_surface *obj_surface = encode_state->input_yuv_object;
3090 assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3091 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3092 assert(obj_surface);
3093 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3094 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3095 surface_format = obj_surface->fourcc;
3097 get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3099 // Mx = #MCUs in a row, My = #MCUs in a column
3100 Mx = (pic_param->picture_width + (horizontal_sampling_factor * 8 - 1)) / (horizontal_sampling_factor * 8);
3101 My = (pic_param->picture_height + (vertical_sampling_factor * 8 - 1)) / (vertical_sampling_factor * 8);
3102 mcu_count = (Mx * My);
3104 num_components = pic_param->num_components;
3105 restart_interval = slice_param->restart_interval;
3107 //Depending on number of components and values set for table selectors,
3108 //only those bits are set in 24:22 for AC table, 20:18 for DC table
3109 for (i = 0; i < num_components; i++) {
3110 huff_ac_table |= ((slice_param->components[i].ac_table_selector) << i);
3111 huff_dc_table |= ((slice_param->components[i].dc_table_selector) << i);
3115 BEGIN_BCS_BATCH(batch, 3);
3117 OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
3119 OUT_BCS_BATCH(batch, mcu_count << 0); //MCU Count
3121 OUT_BCS_BATCH(batch,
3122 (huff_ac_table << 22) | //Huffman AC Table
3123 (huff_dc_table << 18) | //Huffman DC Table
3124 (head_present_flag << 17) | //Head present flag
3125 (is_last_scan << 16) | //Is last scan
3126 (restart_interval << 0)); //Restart Interval
3127 ADVANCE_BCS_BATCH(batch);
3131 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
3132 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
3133 int is_end_of_slice)
3135 struct intel_batchbuffer *batch = encoder_context->base.batch;
3138 if (data_bits_in_last_dw == 0)
3139 data_bits_in_last_dw = 32;
3141 BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3143 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3145 OUT_BCS_BATCH(batch,
3146 (0 << 16) | //DataByteOffset 0 for JPEG Encoder
3147 (0 << 15) | //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3148 (data_bits_in_last_dw << 8) | //DataBitsInLastDW
3149 (0 << 4) | //SkipEmulByteCount 0 for JPEG Encoder
3150 (0 << 3) | //EmulationFlag 0 for JPEG Encoder
3151 ((!!is_last_header) << 2) | //LastHeaderFlag
3152 ((!!is_end_of_slice) << 1) | //EndOfSliceFlag
3153 (1 << 0)); //BitstreamStartReset 1 for JPEG Encoder
3155 intel_batchbuffer_data(batch, insert_data, length_in_dws * 4);
3157 ADVANCE_BCS_BATCH(batch);
3161 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3163 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3164 struct encode_state *encode_state,
3165 struct intel_encoder_context *encoder_context)
3167 if (encode_state->packed_header_data_ext) {
3168 VAEncPackedHeaderParameterBuffer *param = NULL;
3169 unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3170 unsigned int length_in_bits;
3172 param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3173 length_in_bits = param->bit_length;
3175 gen8_mfc_jpeg_pak_insert_object(encoder_context,
3177 ALIGN(length_in_bits, 32) >> 5,
3178 length_in_bits & 0x1f,
3184 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3185 //If the app sends the qmatrix, this will be replaced with the one app sends.
3187 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3190 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3192 //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3193 for (i = 0; i < 64; i++)
3194 mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3196 for (i = 0; i < 64; i++)
3197 mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3200 /* This is at the picture level */
3202 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3203 struct encode_state *encode_state,
3204 struct intel_encoder_context *encoder_context)
3206 int i, j, component, max_selector = 0;
3207 VAEncSliceParameterBufferJPEG *slice_param;
3209 gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3210 gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3211 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3212 gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3213 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3214 gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3216 //do the slice level encoding here
3217 gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3219 //I dont think I need this for loop. Just to be consistent with other encoding logic...
3220 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
3221 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer);
3222 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3224 for (j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3226 for (component = 0; component < slice_param->num_components; component++) {
3227 if (max_selector < slice_param->components[component].dc_table_selector)
3228 max_selector = slice_param->components[component].dc_table_selector;
3230 if (max_selector < slice_param->components[component].ac_table_selector)
3231 max_selector = slice_param->components[component].ac_table_selector;
3238 assert(max_selector < 2);
3239 //send the huffman table using MFC_JPEG_HUFF_TABLE
3240 gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector + 1);
3241 //set MFC_JPEG_SCAN_OBJECT
3242 gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3243 //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3244 gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3249 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3250 struct encode_state *encode_state,
3251 struct intel_encoder_context *encoder_context)
3253 struct intel_batchbuffer *batch = encoder_context->base.batch;
3256 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3257 intel_batchbuffer_emit_mi_flush(batch);
3259 // picture level programing
3260 gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3263 intel_batchbuffer_end_atomic(batch);
3269 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
3270 struct encode_state *encode_state,
3271 struct intel_encoder_context *encoder_context)
3273 gen8_mfc_init(ctx, encode_state, encoder_context);
3274 intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3275 /*Programing bcs pipeline*/
3276 gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3277 gen8_mfc_run(ctx, encode_state, encoder_context);
3279 return VA_STATUS_SUCCESS;
3282 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3283 struct gen6_mfc_context *mfc_context,
3284 int target_frame_size,
3287 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3288 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3289 unsigned int max_qindex = pic_param->clamp_qindex_high;
3290 unsigned int min_qindex = pic_param->clamp_qindex_low;
3291 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3292 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3294 int last_size_gap = -1;
3295 int per_mb_size_at_qindex;
3296 int target_qindex = min_qindex, i;
3298 /* make sure would not overflow*/
3299 if (target_frame_size >= (0x7fffffff >> 9))
3300 target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3302 target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3304 for (i = min_qindex; i <= max_qindex; i++) {
3305 per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3307 if (per_mb_size_at_qindex <= target_mb_size) {
3308 if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3312 last_size_gap = per_mb_size_at_qindex - target_mb_size;
3315 return target_qindex;
3318 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3319 struct intel_encoder_context* encoder_context)
3321 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3322 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3323 double bitrate = encoder_context->brc.bits_per_second[0];
3324 double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
3325 int inum = 1, pnum = 0;
3326 int intra_period = seq_param->intra_period;
3327 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3328 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3329 int max_frame_size = (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs; /* vp8_bits_per_mb table mutilpled 512 */
3331 pnum = intra_period - 1;
3333 mfc_context->brc.mode = encoder_context->rate_control_mode;
3335 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
3336 (double)(inum + BRC_PWEIGHT * pnum));
3337 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
3339 mfc_context->brc.gop_nums[0][SLICE_TYPE_I] = inum;
3340 mfc_context->brc.gop_nums[0][SLICE_TYPE_P] = pnum;
3342 mfc_context->brc.bits_per_frame[0] = bitrate / framerate;
3344 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = gen8_mfc_vp8_qindex_estimate(encode_state,
3346 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I],
3348 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = gen8_mfc_vp8_qindex_estimate(encode_state,
3350 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P],
3353 if (encoder_context->brc.hrd_buffer_size)
3354 mfc_context->hrd.buffer_size[0] = (double)encoder_context->brc.hrd_buffer_size;
3356 mfc_context->hrd.buffer_size[0] = bitrate;
3357 if (encoder_context->brc.hrd_initial_buffer_fullness &&
3358 encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size[0])
3359 mfc_context->hrd.current_buffer_fullness[0] = (double)encoder_context->brc.hrd_initial_buffer_fullness;
3361 mfc_context->hrd.current_buffer_fullness[0] = mfc_context->hrd.buffer_size[0] / 2.0;
3362 mfc_context->hrd.target_buffer_fullness[0] = (double)mfc_context->hrd.buffer_size[0] / 2.0;
3363 mfc_context->hrd.buffer_capacity[0] = (double)mfc_context->hrd.buffer_size[0] / max_frame_size;
3364 mfc_context->hrd.violation_noted = 0;
3367 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3368 struct intel_encoder_context *encoder_context,
3371 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3372 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3373 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3374 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3375 int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3376 int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
3377 int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
3378 int qp; // quantizer of previously encoded slice of current type
3379 int qpn; // predicted quantizer for next frame of current type in integer format
3380 double qpf; // predicted quantizer for next frame of current type in float format
3381 double delta_qp; // QP correction
3382 int target_frame_size, frame_size_next;
3384 * x - how far we are from HRD buffer borders
3385 * y - how far we are from target HRD buffer fullness
3388 double frame_size_alpha;
3389 unsigned int max_qindex = pic_param->clamp_qindex_high;
3390 unsigned int min_qindex = pic_param->clamp_qindex_low;
3392 qp = mfc_context->brc.qp_prime_y[0][slicetype];
3394 target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
3395 if (mfc_context->hrd.buffer_capacity[0] < 5)
3396 frame_size_alpha = 0;
3398 frame_size_alpha = (double)mfc_context->brc.gop_nums[0][slicetype];
3399 if (frame_size_alpha > 30) frame_size_alpha = 30;
3400 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3401 (double)(frame_size_alpha + 1.);
3403 /* frame_size_next: avoiding negative number and too small value */
3404 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3405 frame_size_next = (int)((double)target_frame_size * 0.25);
3407 qpf = (double)qp * target_frame_size / frame_size_next;
3408 qpn = (int)(qpf + 0.5);
3411 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3412 mfc_context->brc.qpf_rounding_accumulator[0] += qpf - qpn;
3413 if (mfc_context->brc.qpf_rounding_accumulator[0] > 1.0) {
3415 mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3416 } else if (mfc_context->brc.qpf_rounding_accumulator[0] < -1.0) {
3418 mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3422 /* making sure that QP is not changing too fast */
3423 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3424 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3425 /* making sure that with QP predictions we did do not leave QPs range */
3426 BRC_CLIP(qpn, min_qindex, max_qindex);
3428 /* checking wthether HRD compliance is still met */
3429 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
3431 /* calculating QP delta as some function*/
3432 x = mfc_context->hrd.target_buffer_fullness[0] - mfc_context->hrd.current_buffer_fullness[0];
3434 x /= mfc_context->hrd.target_buffer_fullness[0];
3435 y = mfc_context->hrd.current_buffer_fullness[0];
3437 x /= (mfc_context->hrd.buffer_size[0] - mfc_context->hrd.target_buffer_fullness[0]);
3438 y = mfc_context->hrd.buffer_size[0] - mfc_context->hrd.current_buffer_fullness[0];
3440 if (y < 0.01) y = 0.01;
3442 else if (x < -1) x = -1;
3444 delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
3445 qpn = (int)(qpn + delta_qp + 0.5);
3447 /* making sure that with QP predictions we did do not leave QPs range */
3448 BRC_CLIP(qpn, min_qindex, max_qindex);
3450 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3451 /* correcting QPs of slices of other types */
3452 if (!is_key_frame) {
3453 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3454 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3456 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3457 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3459 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qindex, max_qindex);
3460 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qindex, max_qindex);
3461 } else if (sts == BRC_UNDERFLOW) { // underflow
3462 if (qpn <= qp) qpn = qp + 2;
3463 if (qpn > max_qindex) {
3465 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3467 } else if (sts == BRC_OVERFLOW) {
3468 if (qpn >= qp) qpn = qp - 2;
3469 if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3471 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3475 mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
3480 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3481 struct intel_encoder_context *encoder_context)
3483 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3484 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3485 int target_bit_rate = encoder_context->brc.bits_per_second[0];
3487 // current we only support CBR mode.
3488 if (rate_control_mode == VA_RC_CBR) {
3489 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3490 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
3491 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3492 mfc_context->vui_hrd.i_frame_number = 0;
3494 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3495 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3496 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3501 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3502 struct gen6_mfc_context *mfc_context)
3504 mfc_context->vui_hrd.i_frame_number++;
3507 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3508 struct intel_encoder_context *encoder_context)
3510 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3512 if (rate_control_mode == VA_RC_CBR) {
3514 assert(encoder_context->codec != CODEC_MPEG2);
3516 brc_updated = encoder_context->brc.need_reset;
3518 /*Programing bit rate control */
3520 gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3523 /*Programing HRD control */
3525 gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3529 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3530 VAEncPictureParameterBufferVP8 *pic_param,
3531 VAQMatrixBufferVP8 *q_matrix)
3534 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3535 unsigned char *coeff_probs_stream_in_buffer;
3537 mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3538 mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3539 mfc_context->vp8_state.frame_header_token_update_pos = 0;
3540 mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3542 mfc_context->vp8_state.prob_skip_false = 255;
3543 memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3544 memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3547 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3548 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3550 mfc_context->vp8_state.prob_intra = 255;
3551 mfc_context->vp8_state.prob_last = 128;
3552 mfc_context->vp8_state.prob_gf = 128;
3554 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3555 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3557 mfc_context->vp8_state.prob_intra = 63;
3558 mfc_context->vp8_state.prob_last = 128;
3559 mfc_context->vp8_state.prob_gf = 128;
3562 mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3564 dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3565 coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3566 assert(coeff_probs_stream_in_buffer);
3567 memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3568 dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3571 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3572 VAQMatrixBufferVP8 *q_matrix)
3575 /*some other probabilities need to be updated*/
3578 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3579 VAEncPictureParameterBufferVP8 *pic_param,
3580 VAQMatrixBufferVP8 *q_matrix,
3581 struct gen6_mfc_context *mfc_context,
3582 struct intel_encoder_context *encoder_context);
3584 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3585 struct intel_encoder_context *encoder_context,
3586 struct gen6_mfc_context *mfc_context)
3588 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3589 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3590 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3591 unsigned char *frame_header_buffer;
3593 binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3595 dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3596 frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3597 assert(frame_header_buffer);
3598 memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3599 free(mfc_context->vp8_state.vp8_frame_header);
3600 dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3603 #define MAX_VP8_FRAME_HEADER_SIZE 0x2000
3604 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
3606 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3607 struct encode_state *encode_state,
3608 struct intel_encoder_context *encoder_context)
3610 struct i965_driver_data *i965 = i965_driver_data(ctx);
3611 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3614 int width_in_mbs = 0;
3615 int height_in_mbs = 0;
3616 int slice_batchbuffer_size;
3617 int is_key_frame, slice_type, rate_control_mode;
3619 VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3620 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3621 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3623 width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3624 height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3626 is_key_frame = !pic_param->pic_flags.bits.frame_type;
3627 slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3628 rate_control_mode = encoder_context->rate_control_mode;
3630 if (rate_control_mode == VA_RC_CBR) {
3631 q_matrix->quantization_index[0] = mfc_context->brc.qp_prime_y[0][slice_type];
3632 for (i = 1; i < 4; i++)
3633 q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3634 for (i = 0; i < 5; i++)
3635 q_matrix->quantization_index_delta[i] = 0;
3638 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3639 (SLICE_HEADER + SLICE_TAIL);
3641 /*Encode common setup for MFC*/
3642 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3643 mfc_context->post_deblocking_output.bo = NULL;
3645 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3646 mfc_context->pre_deblocking_output.bo = NULL;
3648 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3649 mfc_context->uncompressed_picture_source.bo = NULL;
3651 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3652 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3654 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
3655 if (mfc_context->direct_mv_buffers[i].bo != NULL)
3656 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3657 mfc_context->direct_mv_buffers[i].bo = NULL;
3660 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
3661 if (mfc_context->reference_surfaces[i].bo != NULL)
3662 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3663 mfc_context->reference_surfaces[i].bo = NULL;
3666 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3667 bo = dri_bo_alloc(i965->intel.bufmgr,
3669 width_in_mbs * 64 * 16,
3672 mfc_context->intra_row_store_scratch_buffer.bo = bo;
3674 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3675 bo = dri_bo_alloc(i965->intel.bufmgr,
3677 width_in_mbs * height_in_mbs * 16,
3680 mfc_context->macroblock_status_buffer.bo = bo;
3682 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3683 bo = dri_bo_alloc(i965->intel.bufmgr,
3685 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3688 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3690 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3691 bo = dri_bo_alloc(i965->intel.bufmgr,
3693 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3696 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3698 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3699 mfc_context->mfc_batchbuffer_surface.bo = NULL;
3701 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3702 mfc_context->aux_batchbuffer_surface.bo = NULL;
3704 if (mfc_context->aux_batchbuffer) {
3705 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3706 mfc_context->aux_batchbuffer = NULL;
3709 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3710 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3711 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3712 mfc_context->aux_batchbuffer_surface.pitch = 16;
3713 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3714 mfc_context->aux_batchbuffer_surface.size_block = 16;
3716 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3718 /* alloc vp8 encoding buffers*/
3719 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3720 bo = dri_bo_alloc(i965->intel.bufmgr,
3722 MAX_VP8_FRAME_HEADER_SIZE,
3725 mfc_context->vp8_state.frame_header_bo = bo;
3727 mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3728 for (i = 0; i < 8; i++) {
3729 mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3731 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3732 bo = dri_bo_alloc(i965->intel.bufmgr,
3734 mfc_context->vp8_state.intermediate_buffer_max_size,
3737 mfc_context->vp8_state.intermediate_bo = bo;
3739 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3740 bo = dri_bo_alloc(i965->intel.bufmgr,
3742 width_in_mbs * height_in_mbs * 16,
3745 mfc_context->vp8_state.stream_out_bo = bo;
3747 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3748 bo = dri_bo_alloc(i965->intel.bufmgr,
3750 sizeof(vp8_default_coef_probs),
3753 mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3755 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3756 bo = dri_bo_alloc(i965->intel.bufmgr,
3758 VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3761 mfc_context->vp8_state.token_statistics_bo = bo;
3763 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3764 bo = dri_bo_alloc(i965->intel.bufmgr,
3766 width_in_mbs * 16 * 64,
3769 mfc_context->vp8_state.mpc_row_store_bo = bo;
3771 vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3772 vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3776 intel_mfc_vp8_prepare(VADriverContextP ctx,
3777 struct encode_state *encode_state,
3778 struct intel_encoder_context *encoder_context)
3780 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3781 struct object_surface *obj_surface;
3782 struct object_buffer *obj_buffer;
3783 struct i965_coded_buffer_segment *coded_buffer_segment;
3784 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3785 VAStatus vaStatus = VA_STATUS_SUCCESS;
3789 /* reconstructed surface */
3790 obj_surface = encode_state->reconstructed_object;
3791 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
3792 if (pic_param->loop_filter_level[0] == 0) {
3793 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3794 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3796 mfc_context->post_deblocking_output.bo = obj_surface->bo;
3797 dri_bo_reference(mfc_context->post_deblocking_output.bo);
3800 mfc_context->surface_state.width = obj_surface->orig_width;
3801 mfc_context->surface_state.height = obj_surface->orig_height;
3802 mfc_context->surface_state.w_pitch = obj_surface->width;
3803 mfc_context->surface_state.h_pitch = obj_surface->height;
3805 /* set vp8 reference frames */
3806 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3807 obj_surface = encode_state->reference_objects[i];
3809 if (obj_surface && obj_surface->bo) {
3810 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3811 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3813 mfc_context->reference_surfaces[i].bo = NULL;
3817 /* input YUV surface */
3818 obj_surface = encode_state->input_yuv_object;
3819 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3820 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3823 obj_buffer = encode_state->coded_buf_object;
3824 bo = obj_buffer->buffer_store->bo;
3825 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3826 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3827 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3828 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3830 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3831 mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3832 mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3833 dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3835 /* set the internal flag to 0 to indicate the coded size is unknown */
3837 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3838 coded_buffer_segment->mapped = 0;
3839 coded_buffer_segment->codec = encoder_context->codec;
3846 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3847 struct encode_state *encode_state,
3848 struct intel_encoder_context *encoder_context)
3850 struct intel_batchbuffer *batch = encoder_context->base.batch;
3851 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3852 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3853 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3855 BEGIN_BCS_BATCH(batch, 30);
3856 OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3858 OUT_BCS_BATCH(batch,
3859 0 << 9 | /* compressed bitstream output disable */
3860 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3861 1 << 6 | /* RC initial pass */
3862 0 << 4 | /* upate segment feature date flag */
3863 1 << 3 | /* bitstream statistics output enable */
3864 1 << 2 | /* token statistics output enable */
3865 0 << 1 | /* final bitstream output disable */
3868 OUT_BCS_BATCH(batch, 0); /*DW2*/
3870 OUT_BCS_BATCH(batch,
3871 0xfff << 16 | /* max intra mb bit count limit */
3872 0xfff << 0 /* max inter mb bit count limit */
3875 OUT_BCS_BATCH(batch, 0); /*DW4*/
3876 OUT_BCS_BATCH(batch, 0); /*DW5*/
3877 OUT_BCS_BATCH(batch, 0); /*DW6*/
3878 OUT_BCS_BATCH(batch, 0); /*DW7*/
3879 OUT_BCS_BATCH(batch, 0); /*DW8*/
3880 OUT_BCS_BATCH(batch, 0); /*DW9*/
3881 OUT_BCS_BATCH(batch, 0); /*DW10*/
3882 OUT_BCS_BATCH(batch, 0); /*DW11*/
3883 OUT_BCS_BATCH(batch, 0); /*DW12*/
3884 OUT_BCS_BATCH(batch, 0); /*DW13*/
3885 OUT_BCS_BATCH(batch, 0); /*DW14*/
3886 OUT_BCS_BATCH(batch, 0); /*DW15*/
3887 OUT_BCS_BATCH(batch, 0); /*DW16*/
3888 OUT_BCS_BATCH(batch, 0); /*DW17*/
3889 OUT_BCS_BATCH(batch, 0); /*DW18*/
3890 OUT_BCS_BATCH(batch, 0); /*DW19*/
3891 OUT_BCS_BATCH(batch, 0); /*DW20*/
3892 OUT_BCS_BATCH(batch, 0); /*DW21*/
3894 OUT_BCS_BATCH(batch,
3895 pic_param->pic_flags.bits.show_frame << 23 |
3896 pic_param->pic_flags.bits.version << 20
3899 OUT_BCS_BATCH(batch,
3900 (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3901 (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3905 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3908 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3911 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3914 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3917 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3920 OUT_BCS_BATCH(batch, 0);
3922 ADVANCE_BCS_BATCH(batch);
3926 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3927 struct encode_state *encode_state,
3928 struct intel_encoder_context *encoder_context)
3930 struct intel_batchbuffer *batch = encoder_context->base.batch;
3931 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3932 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3933 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3934 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3937 log2num = pic_param->pic_flags.bits.num_token_partitions;
3939 /*update mode and token probs*/
3940 vp8_enc_state_update(mfc_context, q_matrix);
3942 BEGIN_BCS_BATCH(batch, 38);
3943 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3944 OUT_BCS_BATCH(batch,
3945 (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3946 (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3948 OUT_BCS_BATCH(batch,
3950 pic_param->sharpness_level << 16 |
3951 pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3952 pic_param->pic_flags.bits.sign_bias_golden << 12 |
3953 pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3954 pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3955 pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
3956 pic_param->pic_flags.bits.segmentation_enabled << 8 |
3957 !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3958 (pic_param->pic_flags.bits.version / 2) << 4 |
3959 (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3960 !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
3962 OUT_BCS_BATCH(batch,
3963 pic_param->loop_filter_level[3] << 24 |
3964 pic_param->loop_filter_level[2] << 16 |
3965 pic_param->loop_filter_level[1] << 8 |
3966 pic_param->loop_filter_level[0] << 0);
3968 OUT_BCS_BATCH(batch,
3969 q_matrix->quantization_index[3] << 24 |
3970 q_matrix->quantization_index[2] << 16 |
3971 q_matrix->quantization_index[1] << 8 |
3972 q_matrix->quantization_index[0] << 0);
3974 OUT_BCS_BATCH(batch,
3975 ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
3976 abs(q_matrix->quantization_index_delta[4]) << 24 |
3977 ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
3978 abs(q_matrix->quantization_index_delta[3]) << 16 |
3979 ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
3980 abs(q_matrix->quantization_index_delta[2]) << 8 |
3981 ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
3982 abs(q_matrix->quantization_index_delta[1]) << 0);
3984 OUT_BCS_BATCH(batch,
3985 ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
3986 abs(q_matrix->quantization_index_delta[0]) << 0);
3988 OUT_BCS_BATCH(batch,
3989 pic_param->clamp_qindex_high << 8 |
3990 pic_param->clamp_qindex_low << 0);
3992 for (i = 8; i < 19; i++) {
3993 OUT_BCS_BATCH(batch, 0xffffffff);
3996 OUT_BCS_BATCH(batch,
3997 mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
3998 mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
3999 mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
4001 OUT_BCS_BATCH(batch,
4002 mfc_context->vp8_state.prob_skip_false << 24 |
4003 mfc_context->vp8_state.prob_intra << 16 |
4004 mfc_context->vp8_state.prob_last << 8 |
4005 mfc_context->vp8_state.prob_gf << 0);
4007 OUT_BCS_BATCH(batch,
4008 mfc_context->vp8_state.y_mode_probs[3] << 24 |
4009 mfc_context->vp8_state.y_mode_probs[2] << 16 |
4010 mfc_context->vp8_state.y_mode_probs[1] << 8 |
4011 mfc_context->vp8_state.y_mode_probs[0] << 0);
4013 OUT_BCS_BATCH(batch,
4014 mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4015 mfc_context->vp8_state.uv_mode_probs[1] << 8 |
4016 mfc_context->vp8_state.uv_mode_probs[0] << 0);
4018 /* MV update value, DW23-DW32 */
4019 for (i = 0; i < 2; i++) {
4020 for (j = 0; j < 20; j += 4) {
4021 OUT_BCS_BATCH(batch,
4022 (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4023 mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4024 mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
4025 mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
4029 OUT_BCS_BATCH(batch,
4030 (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4031 (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4032 (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
4033 (pic_param->ref_lf_delta[0] & 0x7f) << 0);
4035 OUT_BCS_BATCH(batch,
4036 (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4037 (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4038 (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
4039 (pic_param->mode_lf_delta[0] & 0x7f) << 0);
4041 OUT_BCS_BATCH(batch, 0);
4042 OUT_BCS_BATCH(batch, 0);
4043 OUT_BCS_BATCH(batch, 0);
4045 ADVANCE_BCS_BATCH(batch);
4048 #define OUT_VP8_BUFFER(bo, offset) \
4050 OUT_BCS_RELOC64(batch, \
4052 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4055 OUT_BCS_BATCH(batch, 0); \
4056 OUT_BCS_BATCH(batch, 0); \
4058 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4061 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
4062 struct encode_state *encode_state,
4063 struct intel_encoder_context *encoder_context)
4065 struct i965_driver_data *i965 = i965_driver_data(ctx);
4066 struct intel_batchbuffer *batch = encoder_context->base.batch;
4067 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4069 BEGIN_BCS_BATCH(batch, 32);
4070 OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4072 OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4074 OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4075 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4076 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4077 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4078 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4079 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4080 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4081 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4082 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4083 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4085 OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4086 OUT_BCS_BATCH(batch, 0);
4088 OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4089 OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4090 OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4091 OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4093 ADVANCE_BCS_BATCH(batch);
4097 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4098 struct encode_state *encode_state,
4099 struct intel_encoder_context *encoder_context)
4101 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4103 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4104 mfc_context->set_surface_state(ctx, encoder_context);
4105 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4106 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4107 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4108 gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4109 gen8_mfc_vp8_pic_state(ctx, encode_state, encoder_context);
4110 gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4113 static const unsigned char
4114 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4121 static const unsigned char
4122 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4134 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4136 unsigned int i, pak_pred_mode = 0;
4137 unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4140 pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4142 for (i = 0; i < 8; i++) {
4143 vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4144 assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4145 pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4146 pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4150 return pak_pred_mode;
4153 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
4154 struct intel_encoder_context *encoder_context,
4157 struct intel_batchbuffer *batch)
4159 unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4160 unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4161 unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4164 batch = encoder_context->base.batch;
4166 vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4167 assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4168 pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4170 vme_luma_pred_mode[0] = msg[1];
4171 vme_luma_pred_mode[1] = msg[2];
4172 vme_chroma_pred_mode = msg[3] & 0x3;
4174 pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4175 pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4176 pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4178 BEGIN_BCS_BATCH(batch, 7);
4180 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4181 OUT_BCS_BATCH(batch, 0);
4182 OUT_BCS_BATCH(batch, 0);
4183 OUT_BCS_BATCH(batch,
4184 (0 << 20) | /* mv format: intra mb */
4185 (0 << 18) | /* Segment ID */
4186 (0 << 17) | /* disable coeff clamp */
4187 (1 << 13) | /* intra mb flag */
4188 (0 << 11) | /* refer picture select: last frame */
4189 (pak_intra_mb_mode << 8) | /* mb type */
4190 (pak_chroma_pred_mode << 4) | /* mb uv mode */
4191 (0 << 2) | /* skip mb flag: disable */
4194 OUT_BCS_BATCH(batch, (y << 16) | x);
4195 OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4196 OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4198 ADVANCE_BCS_BATCH(batch);
4202 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
4203 struct intel_encoder_context *encoder_context,
4207 struct intel_batchbuffer *batch)
4212 batch = encoder_context->base.batch;
4214 /* only support inter_16x16 now */
4215 assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4216 /* for inter_16x16, all 16 MVs should be same,
4217 * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4218 * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4220 msg[0] = (((msg[AVC_INTER_MV_OFFSET / 4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET / 4] << 1) & 0xffff));
4222 for (i = 1; i < 16; i++) {
4226 BEGIN_BCS_BATCH(batch, 7);
4228 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4229 OUT_BCS_BATCH(batch,
4230 (0 << 29) | /* enable inline mv data: disable */
4232 OUT_BCS_BATCH(batch,
4234 OUT_BCS_BATCH(batch,
4235 (4 << 20) | /* mv format: inter */
4236 (0 << 18) | /* Segment ID */
4237 (0 << 17) | /* coeff clamp: disable */
4238 (0 << 13) | /* intra mb flag: inter mb */
4239 (0 << 11) | /* refer picture select: last frame */
4240 (0 << 8) | /* mb type: 16x16 */
4241 (0 << 4) | /* mb uv mode: dc_pred */
4242 (0 << 2) | /* skip mb flag: disable */
4245 OUT_BCS_BATCH(batch, (y << 16) | x);
4248 OUT_BCS_BATCH(batch, 0x8);
4249 OUT_BCS_BATCH(batch, 0x8);
4251 ADVANCE_BCS_BATCH(batch);
4255 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4256 struct encode_state *encode_state,
4257 struct intel_encoder_context *encoder_context,
4258 struct intel_batchbuffer *slice_batch)
4260 struct gen6_vme_context *vme_context = encoder_context->vme_context;
4261 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4262 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4263 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4264 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4265 unsigned int *msg = NULL;
4266 unsigned char *msg_ptr = NULL;
4267 unsigned int i, offset, is_intra_frame;
4269 is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4271 dri_bo_map(vme_context->vme_output.bo, 1);
4272 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4274 for (i = 0; i < width_in_mbs * height_in_mbs; i++) {
4275 int h_pos = i % width_in_mbs;
4276 int v_pos = i / width_in_mbs;
4277 msg = (unsigned int *)(msg_ptr + i * vme_context->vme_output.size_block);
4279 if (is_intra_frame) {
4280 gen8_mfc_vp8_pak_object_intra(ctx,
4286 int inter_rdo, intra_rdo;
4287 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4288 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4290 if (intra_rdo < inter_rdo) {
4291 gen8_mfc_vp8_pak_object_intra(ctx,
4297 offset = i * vme_context->vme_output.size_block;
4298 gen8_mfc_vp8_pak_object_inter(ctx,
4308 dri_bo_unmap(vme_context->vme_output.bo);
4312 * A batch buffer for vp8 pak object commands
4315 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4316 struct encode_state *encode_state,
4317 struct intel_encoder_context *encoder_context)
4319 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4320 struct intel_batchbuffer *batch;
4323 batch = mfc_context->aux_batchbuffer;
4324 batch_bo = batch->buffer;
4326 gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4328 intel_batchbuffer_align(batch, 8);
4330 BEGIN_BCS_BATCH(batch, 2);
4331 OUT_BCS_BATCH(batch, 0);
4332 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4333 ADVANCE_BCS_BATCH(batch);
4335 dri_bo_reference(batch_bo);
4336 intel_batchbuffer_free(batch);
4337 mfc_context->aux_batchbuffer = NULL;
4343 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4344 struct encode_state *encode_state,
4345 struct intel_encoder_context *encoder_context)
4347 struct intel_batchbuffer *batch = encoder_context->base.batch;
4348 dri_bo *slice_batch_bo;
4350 slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4353 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4354 intel_batchbuffer_emit_mi_flush(batch);
4356 // picture level programing
4357 gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4359 BEGIN_BCS_BATCH(batch, 4);
4360 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4361 OUT_BCS_RELOC64(batch,
4363 I915_GEM_DOMAIN_COMMAND, 0,
4365 OUT_BCS_BATCH(batch, 0);
4366 ADVANCE_BCS_BATCH(batch);
4369 intel_batchbuffer_end_atomic(batch);
4371 dri_bo_unreference(slice_batch_bo);
4374 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4375 struct encode_state *encode_state,
4376 struct intel_encoder_context *encoder_context)
4378 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4379 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4380 unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4381 unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4383 int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4385 first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4387 dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4389 vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4390 first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4392 for (i = 1; i <= partition_num; i++)
4393 token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4395 /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4396 /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4397 vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4399 dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4401 dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4402 struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4403 coded_buffer_segment->base.size = vp8_coded_bytes;
4404 dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4406 return vp8_coded_bytes;
4410 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4411 struct encode_state *encode_state,
4412 struct intel_encoder_context *encoder_context)
4414 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4415 unsigned int rate_control_mode = encoder_context->rate_control_mode;
4416 int current_frame_bits_size;
4419 gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4420 intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4421 /*Programing bcs pipeline*/
4422 gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4423 gen8_mfc_run(ctx, encode_state, encoder_context);
4424 current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4426 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4427 sts = gen8_mfc_vp8_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
4428 if (sts == BRC_NO_HRD_VIOLATION) {
4429 gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4430 } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4431 if (!mfc_context->hrd.violation_noted) {
4432 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
4433 mfc_context->hrd.violation_noted = 1;
4435 return VA_STATUS_SUCCESS;
4439 return VA_STATUS_SUCCESS;
4443 gen8_mfc_context_destroy(void *context)
4445 struct gen6_mfc_context *mfc_context = context;
4448 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4449 mfc_context->post_deblocking_output.bo = NULL;
4451 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4452 mfc_context->pre_deblocking_output.bo = NULL;
4454 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4455 mfc_context->uncompressed_picture_source.bo = NULL;
4457 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
4458 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4460 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
4461 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4462 mfc_context->direct_mv_buffers[i].bo = NULL;
4465 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4466 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4468 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4469 mfc_context->macroblock_status_buffer.bo = NULL;
4471 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4472 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4474 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4475 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4478 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
4479 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4480 mfc_context->reference_surfaces[i].bo = NULL;
4483 gen8_gpe_context_destroy(&mfc_context->gpe_context);
4485 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4486 mfc_context->mfc_batchbuffer_surface.bo = NULL;
4488 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4489 mfc_context->aux_batchbuffer_surface.bo = NULL;
4491 if (mfc_context->aux_batchbuffer)
4492 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4494 mfc_context->aux_batchbuffer = NULL;
4496 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4497 mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4499 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4500 mfc_context->vp8_state.final_frame_bo = NULL;
4502 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4503 mfc_context->vp8_state.frame_header_bo = NULL;
4505 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4506 mfc_context->vp8_state.intermediate_bo = NULL;
4508 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4509 mfc_context->vp8_state.mpc_row_store_bo = NULL;
4511 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4512 mfc_context->vp8_state.stream_out_bo = NULL;
4514 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4515 mfc_context->vp8_state.token_statistics_bo = NULL;
4520 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4522 struct encode_state *encode_state,
4523 struct intel_encoder_context *encoder_context)
4528 case VAProfileH264ConstrainedBaseline:
4529 case VAProfileH264Main:
4530 case VAProfileH264High:
4531 case VAProfileH264MultiviewHigh:
4532 case VAProfileH264StereoHigh:
4533 vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4536 case VAProfileMPEG2Simple:
4537 case VAProfileMPEG2Main:
4538 vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4541 case VAProfileJPEGBaseline:
4542 jpeg_init_default_qmatrix(ctx, encoder_context);
4543 vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4546 case VAProfileVP8Version0_3:
4547 vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4551 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4558 extern Bool i965_encoder_vp8_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
4560 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4562 struct i965_driver_data *i965 = i965_driver_data(ctx);
4563 struct gen6_mfc_context *mfc_context;
4565 if (IS_CHERRYVIEW(i965->intel.device_info) && encoder_context->codec == CODEC_VP8)
4566 return i965_encoder_vp8_pak_context_init(ctx, encoder_context);
4568 mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4569 assert(mfc_context);
4570 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4572 mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
4573 mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
4574 mfc_context->gpe_context.curbe.length = 32 * 4;
4575 mfc_context->gpe_context.sampler.entry_size = 0;
4576 mfc_context->gpe_context.sampler.max_entries = 0;
4578 if (i965->intel.eu_total > 0)
4579 mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total;
4581 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4583 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4584 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4585 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4586 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4588 if (IS_GEN9(i965->intel.device_info)) {
4589 gen8_gpe_load_kernels(ctx,
4590 &mfc_context->gpe_context,
4594 gen8_gpe_load_kernels(ctx,
4595 &mfc_context->gpe_context,
4600 mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4601 mfc_context->set_surface_state = gen8_mfc_surface_state;
4602 mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4603 mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4604 mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4605 mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4606 mfc_context->insert_object = gen8_mfc_avc_insert_object;
4607 mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4609 encoder_context->mfc_context = mfc_context;
4610 encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4611 encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4613 if (encoder_context->codec == CODEC_VP8)
4614 encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4616 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;