2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao, Yakui <yakui.zhao@intel.com>
26 * Chen, Peng <peng.c.chen@intel.com>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40 #include "intel_media.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_api.h"
45 #include "i965_encoder_utils.h"
46 #include "gen10_hcp_common.h"
47 #include "gen10_hevc_enc_common.h"
48 #include "gen10_hevc_enc_kernel.h"
49 #include "gen10_hevc_enc_const_def.h"
50 #include "gen10_hevc_enc_kernels_binary.h"
51 #include "gen10_hevc_encoder.h"
54 gen10_hevc_get_kernel_header_and_size(void *pvbinary,
56 GEN10_HEVC_ENC_OPERATION operation,
58 struct i965_kernel *ret_kernel)
60 typedef uint32_t BIN_PTR[4];
62 gen10_hevc_kernel_header *pkh_table;
63 gen10_intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
68 if (!pvbinary || !ret_kernel)
71 bin_start = (char *)pvbinary;
72 pkh_table = (gen10_hevc_kernel_header *)pvbinary;
73 pinvalid_entry = &(pkh_table->hevc_last) + 1;
74 next_krnoffset = binary_size;
77 case GEN10_HEVC_ENC_SCALING_CONVERSION:
78 pcurr_header = &pkh_table->hevc_ds_convert;
80 case GEN10_HEVC_ENC_ME:
81 pcurr_header = &pkh_table->hevc_hme;
83 case GEN10_HEVC_ENC_BRC:
84 switch (krnstate_idx) {
86 pcurr_header = &pkh_table->hevc_brc_init;
89 pcurr_header = &pkh_table->hevc_brc_init;
92 pcurr_header = &pkh_table->hevc_brc_update;
95 pcurr_header = &pkh_table->hevc_brc_lcuqp;
103 case GEN10_HEVC_ENC_MBENC:
104 switch (krnstate_idx) {
106 pcurr_header = &pkh_table->hevc_intra;
109 pcurr_header = &pkh_table->hevc_enc;
112 pcurr_header = &pkh_table->hevc_enc_lcu64;
129 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
131 pnext_header = (pcurr_header + 1);
132 if (pnext_header < pinvalid_entry)
133 next_krnoffset = pnext_header->kernel_start_pointer << 6;
135 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
140 #define MAX_HEVC_ENCODER_SURFACES 64
141 #define MAX_URB_SIZE 4096
142 #define NUM_KERNELS_PER_GPE_CONTEXT 1
145 gen10_hevc_init_gpe_context(VADriverContextP ctx,
146 struct i965_gpe_context *gpe_context,
147 struct gen10_hevc_enc_kernel_parameter *kernel_param)
149 struct i965_driver_data *i965 = i965_driver_data(ctx);
151 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
153 gpe_context->sampler.entry_size = 0;
154 gpe_context->sampler.max_entries = 0;
155 if (kernel_param->sampler_size) {
156 gpe_context->sampler.entry_size = kernel_param->sampler_size;
157 gpe_context->sampler.max_entries = 1;
160 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
161 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
163 gpe_context->surface_state_binding_table.max_entries = MAX_HEVC_ENCODER_SURFACES;
164 gpe_context->surface_state_binding_table.binding_table_offset = 0;
165 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_HEVC_ENCODER_SURFACES * 4, 64);
166 gpe_context->surface_state_binding_table.length = ALIGN(MAX_HEVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_HEVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
168 if (i965->intel.eu_total > 0)
169 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
171 gpe_context->vfe_state.max_num_threads = 112;
173 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
174 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
175 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
176 gpe_context->vfe_state.curbe_allocation_size -
177 ((gpe_context->idrt.entry_size >> 5) *
178 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
179 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
180 gpe_context->vfe_state.gpgpu_mode = 0;
184 gen10_hevc_init_vfe_scoreboard(struct i965_gpe_context *gpe_context,
185 struct gen10_hevc_enc_scoreboard_parameter *scoreboard_param)
187 if (!gpe_context || !scoreboard_param)
190 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
191 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
192 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
194 if (scoreboard_param->no_dependency) {
195 gpe_context->vfe_desc5.scoreboard0.mask = 0x0;
196 gpe_context->vfe_desc5.scoreboard0.enable = 0;
197 gpe_context->vfe_desc5.scoreboard0.type = 0;
199 gpe_context->vfe_desc6.dword = 0;
200 gpe_context->vfe_desc7.dword = 0;
202 gpe_context->vfe_desc5.scoreboard0.mask = 0x7F;
203 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
204 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
206 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0xF;
207 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
209 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x0;
210 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
212 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 1;
213 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
215 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0;
216 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0;
217 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
218 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0;
219 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0;
220 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0;
221 gpe_context->vfe_desc7.scoreboard2.delta_x7 = 0;
222 gpe_context->vfe_desc7.scoreboard2.delta_y7 = 0;
227 gen10_hevc_vme_init_scaling_context(VADriverContextP ctx,
228 struct gen10_hevc_enc_context *vme_context,
229 struct gen10_scaling_context *scaling_context)
231 struct gen10_hevc_enc_state *hevc_state;
232 struct i965_gpe_context *gpe_context = NULL;
233 struct gen10_hevc_enc_kernel_parameter kernel_param;
234 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
235 struct i965_kernel scale_kernel;
237 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
239 kernel_param.curbe_size = sizeof(gen10_hevc_scaling_curbe_data);
240 kernel_param.inline_data_size = sizeof(gen10_hevc_scaling_curbe_data);
241 kernel_param.sampler_size = 0;
243 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
244 scoreboard_param.mask = 0xFF;
245 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
246 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
247 scoreboard_param.no_dependency = true;
249 gpe_context = &scaling_context->gpe_context;
250 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
251 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
253 memset(&scale_kernel, 0, sizeof(scale_kernel));
255 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
256 sizeof(gen10_media_hevc_kernels),
257 GEN10_HEVC_ENC_SCALING_CONVERSION,
261 gen8_gpe_load_kernels(ctx,
268 gen10_hevc_vme_init_me_context(VADriverContextP ctx,
269 struct gen10_hevc_enc_context *vme_context,
270 struct gen10_me_context *me_context)
272 struct gen10_hevc_enc_state *hevc_state;
273 struct i965_gpe_context *gpe_context = NULL;
274 struct gen10_hevc_enc_kernel_parameter kernel_param;
275 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
276 struct i965_kernel me_kernel;
278 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
280 kernel_param.curbe_size = sizeof(gen10_hevc_me_curbe_data);
281 kernel_param.inline_data_size = sizeof(gen10_hevc_me_curbe_data);
282 kernel_param.sampler_size = 0;
284 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
285 scoreboard_param.mask = 0xFF;
286 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
287 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
288 scoreboard_param.no_dependency = true;
290 gpe_context = &me_context->gpe_context;
291 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
292 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
294 memset(&me_kernel, 0, sizeof(me_kernel));
296 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
297 sizeof(gen10_media_hevc_kernels),
302 gen8_gpe_load_kernels(ctx,
309 gen10_hevc_vme_init_mbenc_context(VADriverContextP ctx,
310 struct gen10_hevc_enc_context *vme_context,
311 struct gen10_mbenc_context *mbenc_context)
313 struct gen10_hevc_enc_state *hevc_state;
314 struct i965_gpe_context *gpe_context = NULL;
315 struct gen10_hevc_enc_kernel_parameter kernel_param;
316 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
317 struct i965_kernel mbenc_kernel;
319 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
321 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
322 scoreboard_param.mask = 0xFF;
323 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
324 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
326 gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_I_KRNIDX_G10];
327 kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_intra_curbe_data);
328 kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_intra_curbe_data);
329 kernel_param.sampler_size = 0;
330 scoreboard_param.no_dependency = false;
331 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
333 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
335 memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
337 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
338 sizeof(gen10_media_hevc_kernels),
339 GEN10_HEVC_ENC_MBENC,
340 GEN10_HEVC_MBENC_I_KRNIDX_G10,
343 gen8_gpe_load_kernels(ctx,
348 gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10];
349 kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
350 kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
351 kernel_param.sampler_size = 0;
352 scoreboard_param.no_dependency = false;
353 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
354 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
356 memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
358 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
359 sizeof(gen10_media_hevc_kernels),
360 GEN10_HEVC_ENC_MBENC,
361 GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10,
363 gen8_gpe_load_kernels(ctx,
368 gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10];
369 kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
370 kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
371 kernel_param.sampler_size = 0;
372 scoreboard_param.no_dependency = false;
373 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
374 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
376 memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
378 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
379 sizeof(gen10_media_hevc_kernels),
380 GEN10_HEVC_ENC_MBENC,
381 GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10,
384 gen8_gpe_load_kernels(ctx,
391 gen10_hevc_vme_init_brc_context(VADriverContextP ctx,
392 struct gen10_hevc_enc_context *vme_context,
393 struct gen10_brc_context *brc_context)
395 struct gen10_hevc_enc_state *hevc_state;
396 struct i965_gpe_context *gpe_context = NULL;
397 struct gen10_hevc_enc_kernel_parameter kernel_param;
398 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
399 struct i965_kernel brc_kernel;
401 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
403 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
404 scoreboard_param.mask = 0xFF;
405 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
406 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
408 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_INIT];
409 kernel_param.curbe_size = sizeof(gen10_hevc_brc_init_curbe_data);
410 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_init_curbe_data);
411 kernel_param.sampler_size = 0;
412 scoreboard_param.no_dependency = true;
413 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
414 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
416 memset(&brc_kernel, 0, sizeof(brc_kernel));
418 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
419 sizeof(gen10_media_hevc_kernels),
424 gen8_gpe_load_kernels(ctx,
429 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_RESET];
430 kernel_param.curbe_size = sizeof(gen10_hevc_brc_init_curbe_data);
431 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_init_curbe_data);
432 kernel_param.sampler_size = 0;
433 scoreboard_param.no_dependency = true;
434 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
435 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
437 memset(&brc_kernel, 0, sizeof(brc_kernel));
439 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
440 sizeof(gen10_media_hevc_kernels),
442 GEN10_HEVC_BRC_RESET,
445 gen8_gpe_load_kernels(ctx,
450 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_FRAME_UPDATE];
451 kernel_param.curbe_size = sizeof(gen10_hevc_brc_update_curbe_data);
452 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_update_curbe_data);
453 kernel_param.sampler_size = 0;
454 scoreboard_param.no_dependency = true;
455 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
456 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
458 memset(&brc_kernel, 0, sizeof(brc_kernel));
460 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
461 sizeof(gen10_media_hevc_kernels),
463 GEN10_HEVC_BRC_FRAME_UPDATE,
466 gen8_gpe_load_kernels(ctx,
471 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_LCU_UPDATE];
472 kernel_param.curbe_size = sizeof(gen10_hevc_brc_update_curbe_data);
473 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_update_curbe_data);
474 kernel_param.sampler_size = 0;
475 scoreboard_param.no_dependency = true;
476 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
477 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
479 memset(&brc_kernel, 0, sizeof(brc_kernel));
481 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
482 sizeof(gen10_media_hevc_kernels),
484 GEN10_HEVC_BRC_LCU_UPDATE,
487 gen8_gpe_load_kernels(ctx,
494 gen10_hevc_vme_init_kernels_context(VADriverContextP ctx,
495 struct intel_encoder_context *encoder_context,
496 struct gen10_hevc_enc_context *vme_context)
498 gen10_hevc_vme_init_scaling_context(ctx, vme_context, &vme_context->scaling_context);
499 gen10_hevc_vme_init_me_context(ctx, vme_context, &vme_context->me_context);
500 gen10_hevc_vme_init_mbenc_context(ctx, vme_context, &vme_context->mbenc_context);
501 gen10_hevc_vme_init_brc_context(ctx, vme_context, &vme_context->brc_context);
505 gen10_hevc_free_surface(void **data)
507 struct gen10_hevc_surface_priv *surface_priv;
512 surface_priv = *data;
514 if (surface_priv->scaled_4x_surface) {
515 i965_free_gpe_resource(&surface_priv->gpe_scaled_4x_surface);
517 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_4x_surface_id, 1);
518 surface_priv->scaled_4x_surface_id = VA_INVALID_SURFACE;
519 surface_priv->scaled_4x_surface = NULL;
522 if (surface_priv->scaled_16x_surface) {
523 i965_free_gpe_resource(&surface_priv->gpe_scaled_16x_surface);
525 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_16x_surface_id, 1);
526 surface_priv->scaled_16x_surface_id = VA_INVALID_SURFACE;
527 surface_priv->scaled_16x_surface = NULL;
530 if (surface_priv->scaled_2x_surface) {
531 i965_free_gpe_resource(&surface_priv->gpe_scaled_2x_surface);
533 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_2x_surface_id, 1);
534 surface_priv->scaled_2x_surface_id = VA_INVALID_SURFACE;
535 surface_priv->scaled_2x_surface = NULL;
538 if (surface_priv->converted_surface) {
539 i965_free_gpe_resource(&surface_priv->gpe_converted_surface);
541 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->converted_surface_id, 1);
542 surface_priv->converted_surface_id = VA_INVALID_SURFACE;
543 surface_priv->converted_surface = NULL;
546 i965_free_gpe_resource(&surface_priv->motion_vector_temporal);
556 gen10_hevc_init_surface_priv(VADriverContextP ctx,
557 struct encode_state *encode_state,
558 struct intel_encoder_context *encoder_context,
559 struct object_surface *obj_surface)
561 struct i965_driver_data *i965 = i965_driver_data(ctx);
562 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
563 struct gen10_hevc_enc_frame_info *frame_info;
564 struct gen10_hevc_enc_state *hevc_state;
565 struct gen10_hevc_surface_priv *surface_priv;
566 int downscaled_width_4x = 0, downscaled_height_4x = 0;
567 int downscaled_width_16x = 0, downscaled_height_16x = 0;
568 int frame_width = 0, frame_height = 0, size;
570 if (!obj_surface || !obj_surface->bo)
571 return VA_STATUS_ERROR_INVALID_SURFACE;
573 if (obj_surface->private_data &&
574 obj_surface->free_private_data != gen10_hevc_free_surface) {
575 obj_surface->free_private_data(&obj_surface->private_data);
576 obj_surface->private_data = NULL;
579 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
580 frame_info = &vme_context->frame_info;
582 if (obj_surface->private_data) {
583 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
585 if ((surface_priv->frame_width == frame_info->frame_width) &&
586 (surface_priv->frame_height == frame_info->frame_height) &&
587 (surface_priv->width_ctb == frame_info->width_in_lcu) &&
588 (surface_priv->height_ctb == frame_info->height_in_lcu) &&
589 (surface_priv->is_10bit == hevc_state->is_10bit) &&
590 (surface_priv->is_64lcu == hevc_state->is_64lcu))
591 return VA_STATUS_SUCCESS;
593 obj_surface->free_private_data(&obj_surface->private_data);
594 obj_surface->private_data = NULL;
598 surface_priv = calloc(1, sizeof(struct gen10_hevc_surface_priv));
601 return VA_STATUS_ERROR_ALLOCATION_FAILED;
603 surface_priv->ctx = ctx;
605 obj_surface->private_data = surface_priv;
606 obj_surface->free_private_data = gen10_hevc_free_surface;
608 if (hevc_state->is_64lcu) {
609 frame_width = ALIGN(frame_info->frame_width, 64) >> 1;
610 frame_height = ALIGN(frame_info->frame_height, 64) >> 1;
612 if (i965_CreateSurfaces(ctx,
617 &surface_priv->scaled_2x_surface_id) != VA_STATUS_SUCCESS)
618 return VA_STATUS_ERROR_ALLOCATION_FAILED;
620 surface_priv->scaled_2x_surface = SURFACE(surface_priv->scaled_2x_surface_id);
622 if (!surface_priv->scaled_2x_surface)
623 return VA_STATUS_ERROR_ALLOCATION_FAILED;
625 i965_check_alloc_surface_bo(ctx, surface_priv->scaled_2x_surface, 1,
626 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
628 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_2x_surface,
629 surface_priv->scaled_2x_surface);
632 if (hevc_state->is_10bit) {
633 if (i965_CreateSurfaces(ctx,
634 frame_info->frame_width,
635 frame_info->frame_height,
638 &surface_priv->converted_surface_id) != VA_STATUS_SUCCESS)
639 return VA_STATUS_ERROR_ALLOCATION_FAILED;
641 surface_priv->converted_surface = SURFACE(surface_priv->converted_surface_id);
643 if (!surface_priv->converted_surface)
644 return VA_STATUS_ERROR_ALLOCATION_FAILED;
646 i965_check_alloc_surface_bo(ctx, surface_priv->converted_surface, 1,
647 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
649 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_converted_surface,
650 surface_priv->converted_surface);
653 if (hevc_state->hme_supported) {
654 downscaled_width_4x = ALIGN(frame_info->frame_width / 4, 32);
655 downscaled_height_4x = ALIGN(frame_info->frame_height / 4, 32);
657 if (i965_CreateSurfaces(ctx,
659 downscaled_height_4x,
662 &surface_priv->scaled_4x_surface_id) != VA_STATUS_SUCCESS)
663 return VA_STATUS_ERROR_ALLOCATION_FAILED;
665 surface_priv->scaled_4x_surface = SURFACE(surface_priv->scaled_4x_surface_id);
667 if (!surface_priv->scaled_4x_surface)
668 return VA_STATUS_ERROR_ALLOCATION_FAILED;
670 i965_check_alloc_surface_bo(ctx, surface_priv->scaled_4x_surface, 1,
671 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
673 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_4x_surface,
674 surface_priv->scaled_4x_surface);
677 if (hevc_state->hme_supported &&
678 hevc_state->b16xme_supported) {
679 downscaled_width_16x = ALIGN(downscaled_width_4x / 4, 32);
680 downscaled_height_16x = ALIGN(downscaled_height_4x / 4, 32);
682 if (i965_CreateSurfaces(ctx,
683 downscaled_width_16x,
684 downscaled_height_16x,
687 &surface_priv->scaled_16x_surface_id) != VA_STATUS_SUCCESS)
688 return VA_STATUS_ERROR_ALLOCATION_FAILED;
690 surface_priv->scaled_16x_surface = SURFACE(surface_priv->scaled_16x_surface_id);
692 if (!surface_priv->scaled_16x_surface)
693 return VA_STATUS_ERROR_ALLOCATION_FAILED;
695 i965_check_alloc_surface_bo(ctx, surface_priv->scaled_16x_surface, 1,
696 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
698 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_16x_surface,
699 surface_priv->scaled_16x_surface);
702 frame_width = frame_info->frame_width;
703 frame_height = frame_info->frame_height;
705 size = MAX(((frame_width + 63) >> 6) * ((frame_height + 15) >> 4),
706 ((frame_width + 31) >> 5) * ((frame_height + 31) >> 5));
707 size = ALIGN(size, 2) * 64;
708 if (!i965_allocate_gpe_resource(i965->intel.bufmgr,
709 &surface_priv->motion_vector_temporal,
711 "Motion vector temporal buffer"))
712 return VA_STATUS_ERROR_ALLOCATION_FAILED;
714 surface_priv->is_10bit = hevc_state->is_10bit;
715 surface_priv->is_64lcu = hevc_state->is_64lcu;
716 surface_priv->frame_width = frame_info->frame_width;
717 surface_priv->frame_height = frame_info->frame_height;
718 surface_priv->width_ctb = frame_info->width_in_lcu;
719 surface_priv->height_ctb = frame_info->height_in_lcu;
721 return VA_STATUS_SUCCESS;
725 gen10_hevc_free_enc_resources(void *context)
727 struct gen10_hevc_enc_context *vme_context = context;
732 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
734 i965_free_gpe_resource(&vme_context->res_temp_curecord_lcu32_surface);
735 i965_free_gpe_resource(&vme_context->res_16x16_qp_data_surface);
736 i965_free_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
737 i965_free_gpe_resource(&vme_context->res_concurrent_tg_data);
738 i965_free_gpe_resource(&vme_context->res_cu_split_surface);
739 i965_free_gpe_resource(&vme_context->res_kernel_trace_data);
740 i965_free_gpe_resource(&vme_context->res_enc_const_table_intra);
741 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter);
742 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
743 i965_free_gpe_resource(&vme_context->res_scratch_surface);
745 i965_free_gpe_resource(&vme_context->res_temp2_curecord_lcu32_surface);
746 i965_free_gpe_resource(&vme_context->res_temp_curecord_surface_lcu64);
747 i965_free_gpe_resource(&vme_context->res_enc_scratch_buffer);
748 i965_free_gpe_resource(&vme_context->res_enc_scratch_lcu64_buffer);
749 i965_free_gpe_resource(&vme_context->res_64x64_dist_buffer);
751 i965_free_gpe_resource(&vme_context->res_jbq_header_buffer);
752 i965_free_gpe_resource(&vme_context->res_jbq_header_lcu64_buffer);
753 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu32_surface);
754 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu64_surface);
755 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu32_surface);
757 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu64_surface);
758 i965_free_gpe_resource(&vme_context->res_mb_stat_surface);
759 i965_free_gpe_resource(&vme_context->res_mb_split_surface);
761 i965_free_gpe_resource(&vme_context->res_s4x_memv_data_surface);
762 i965_free_gpe_resource(&vme_context->res_s4x_me_dist_surface);
764 i965_free_gpe_resource(&vme_context->res_s16x_memv_data_surface);
765 i965_free_gpe_resource(&vme_context->res_mv_dist_sum_buffer);
767 i965_free_gpe_resource(&vme_context->res_brc_me_dist_surface);
768 i965_free_gpe_resource(&vme_context->res_brc_input_enc_kernel_buffer);
769 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
770 i965_free_gpe_resource(&vme_context->res_brc_intra_dist_surface);
771 i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[0]);
772 i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[1]);
773 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_write_buffer);
774 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_read_buffer);
775 i965_free_gpe_resource(&vme_context->res_brc_const_data_surface);
776 i965_free_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
777 i965_free_gpe_resource(&vme_context->res_brc_mb_qp_surface);
781 gen10_hevc_allocate_enc_resources(VADriverContextP ctx,
782 struct encode_state *encode_state,
783 struct intel_encoder_context *encoder_context)
786 struct i965_driver_data *i965 = i965_driver_data(ctx);
787 struct gen10_hevc_enc_context *vme_context;
788 struct gen10_hevc_enc_state *hevc_state;
789 struct gen10_hevc_enc_frame_info *frame_info;
790 int dw_width, dw_height;
795 vme_context = (struct gen10_hevc_enc_context *)encoder_context->vme_context;
796 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
797 frame_info = &vme_context->frame_info;
799 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
800 res_size = vme_context->frame_info.width_in_lcu * vme_context->frame_info.height_in_lcu;
801 if (hevc_state->is_64lcu)
802 res_size = res_size * 64 * 32;
804 res_size = res_size * 16 * 32;
806 res_size = res_size + hevc_state->cu_records_offset;
807 res_size = ALIGN(res_size, 4096);
808 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
809 &vme_context->res_mb_code_surface,
815 i965_free_gpe_resource(&vme_context->res_temp_curecord_lcu32_surface);
816 dw_width = ALIGN(hevc_state->frame_width, 64);
817 dw_height = ALIGN(hevc_state->frame_height, 64);
818 dw_width = ALIGN(dw_width, 64);
819 res_size = dw_width * dw_height * 64 + 1024;
820 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
821 &vme_context->res_temp_curecord_lcu32_surface,
822 dw_width, dw_height, dw_width,
823 "Temp CURecord surfaces");
827 i965_free_gpe_resource(&vme_context->res_16x16_qp_data_surface);
828 dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
829 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
830 dw_width = ALIGN(dw_width, 64);
831 dw_height = ALIGN(dw_height, 64);
832 dw_width = ALIGN(dw_width, 64);
833 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
834 &vme_context->res_16x16_qp_data_surface,
835 dw_width, dw_height, dw_width,
836 "CU 16x16 input surface");
840 i965_free_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
841 res_size = vme_context->frame_info.width_in_lcu * vme_context->frame_info.height_in_lcu * 16;
842 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
843 &vme_context->res_lculevel_input_data_buffer,
845 "LCU Input data buffer");
849 i965_free_gpe_resource(&vme_context->res_concurrent_tg_data);
851 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
852 &vme_context->res_concurrent_tg_data,
854 "Concurrent Thread_group data");
858 i965_free_gpe_resource(&vme_context->res_cu_split_surface);
859 dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
860 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
861 dw_width = ALIGN(dw_width, 64);
862 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
863 &vme_context->res_cu_split_surface,
864 dw_width, dw_height, dw_width,
869 i965_free_gpe_resource(&vme_context->res_kernel_trace_data);
871 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
872 &vme_context->res_kernel_trace_data,
878 i965_free_gpe_resource(&vme_context->res_enc_const_table_intra);
879 res_size = GEN10_HEVC_ENC_INTRA_CONST_LUT_SIZE ;
880 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
881 &vme_context->res_enc_const_table_intra,
883 "Constant data for Intra");
887 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter);
888 res_size = GEN10_HEVC_ENC_INTER_CONST_LUT32_SIZE ;
890 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891 &vme_context->res_enc_const_table_inter,
893 "Constant data for Inter");
897 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
898 if (hevc_state->is_64lcu) {
899 res_size = GEN10_HEVC_ENC_INTER_CONST_LUT64_SIZE ;
901 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
902 &vme_context->res_enc_const_table_inter_lcu64,
904 "Constant data for LCU64_Inter");
909 i965_free_gpe_resource(&vme_context->res_scratch_surface);
910 dw_width = ALIGN(hevc_state->frame_width, 64) >> 3;
911 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
912 dw_width = ALIGN(dw_width, 64);
913 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
914 &vme_context->res_scratch_surface,
915 dw_width, dw_height, dw_width,
916 "CU scratch surface");
920 i965_free_gpe_resource(&vme_context->res_temp2_curecord_lcu32_surface);
921 dw_width = ALIGN(hevc_state->frame_width, 64);
922 dw_height = ALIGN(hevc_state->frame_height, 64);
923 dw_width = ALIGN(dw_width, 64);
924 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
925 &vme_context->res_temp2_curecord_lcu32_surface,
926 dw_width, dw_height, dw_width,
927 "second temp CURecord surfaces");
931 if (hevc_state->is_64lcu) {
932 i965_free_gpe_resource(&vme_context->res_temp_curecord_surface_lcu64);
933 /* the max number of CU based on 8x8. */
934 dw_width = ALIGN(hevc_state->frame_width, 64);
935 dw_height = ALIGN(hevc_state->frame_height, 64) / 2;
936 dw_width = ALIGN(dw_width, 64);
937 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
938 &vme_context->res_temp_curecord_surface_lcu64,
939 dw_width, dw_height, dw_width,
940 "temp CURecord LCU64 surfaces");
945 i965_free_gpe_resource(&vme_context->res_enc_scratch_buffer);
946 dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
947 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
948 res_size = dw_width * dw_height * 13312 + 4096;
949 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
950 &vme_context->res_enc_scratch_buffer,
956 i965_free_gpe_resource(&vme_context->res_enc_scratch_lcu64_buffer);
957 dw_width = vme_context->frame_info.width_in_lcu;
958 dw_height = vme_context->frame_info.height_in_lcu;
959 res_size = dw_width * dw_height * 13312;
960 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
961 &vme_context->res_enc_scratch_lcu64_buffer,
967 i965_free_gpe_resource(&vme_context->res_64x64_dist_buffer);
968 dw_width = ALIGN(hevc_state->frame_width, 64) >> 6;
969 dw_height = ALIGN(hevc_state->frame_height, 64) >> 6;
970 res_size = dw_width * dw_height * 32;
971 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
972 &vme_context->res_64x64_dist_buffer,
974 "Res 64x64 Distortion");
978 i965_free_gpe_resource(&vme_context->res_jbq_header_buffer);
979 dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
980 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
981 res_size = dw_width * dw_height * 2656;
982 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
983 &vme_context->res_jbq_header_buffer,
989 i965_free_gpe_resource(&vme_context->res_jbq_header_lcu64_buffer);
990 dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
991 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
992 res_size = dw_width * dw_height * 32;
993 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
994 &vme_context->res_jbq_header_lcu64_buffer,
996 "Job queue_header for Multi-thread LCU");
1000 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu32_surface);
1001 dw_width = ALIGN(hevc_state->frame_width, 64);
1002 dw_height = (ALIGN(hevc_state->frame_height, 64) >> 5) * 58;
1003 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1004 &vme_context->res_jbq_data_lcu32_surface,
1005 dw_width, dw_height, dw_width,
1006 "Job queue data surface for Multi-thread LCU32");
1010 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu64_surface);
1011 dw_width = ALIGN(hevc_state->frame_width, 64) >> 1;
1012 dw_height = (ALIGN(hevc_state->frame_height, 64) >> 6) * 66;
1013 dw_width = ALIGN(dw_width, 64);
1014 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1015 &vme_context->res_jbq_data_lcu64_surface,
1016 dw_width, dw_height, dw_width,
1017 "Job queue data surface for Multi-thread LCU64");
1021 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu32_surface);
1022 dw_width = ALIGN(hevc_state->frame_width, 64) << 1;
1023 dw_height = ALIGN(hevc_state->frame_height, 64) << 2;
1024 dw_width = ALIGN(dw_width, 64);
1025 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1026 &vme_context->res_residual_scratch_lcu32_surface,
1027 dw_width, dw_height, dw_width,
1028 "Resiudal scratch for LCU32");
1032 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu64_surface);
1033 dw_width = ALIGN(hevc_state->frame_width, 64) << 1;
1034 dw_height = ALIGN(hevc_state->frame_height, 64) << 2;
1035 dw_width = ALIGN(dw_width, 64);
1036 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1037 &vme_context->res_residual_scratch_lcu64_surface,
1038 dw_width, dw_height, dw_width,
1039 "Resiudal scratch for LCU64");
1043 i965_free_gpe_resource(&vme_context->res_mb_stat_surface);
1044 dw_width = ALIGN(frame_info->width_in_mb * 4, 64);
1045 dw_height = ALIGN(frame_info->height_in_mb, 8) * 2;
1046 dw_width = ALIGN(dw_width, 64);
1047 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1048 &vme_context->res_mb_stat_surface,
1049 dw_width, dw_height, dw_width,
1054 i965_free_gpe_resource(&vme_context->res_mb_split_surface);
1055 dw_width = ALIGN(hevc_state->frame_width, 64) >> 2;
1056 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
1057 dw_width = ALIGN(dw_width, 64);
1058 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1059 &vme_context->res_mb_split_surface,
1060 dw_width, dw_height, dw_width,
1061 "MB split surface");
1065 if (hevc_state->hme_supported) {
1066 i965_free_gpe_resource(&vme_context->res_s4x_memv_data_surface);
1067 dw_width = hevc_state->frame_width_4x * 4;
1068 dw_height = hevc_state->frame_height_4x >> 3;
1069 dw_width = ALIGN(dw_width, 64);
1070 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1071 &vme_context->res_s4x_memv_data_surface,
1072 dw_width, dw_height, dw_width,
1077 i965_free_gpe_resource(&vme_context->res_s4x_me_dist_surface);
1078 dw_width = hevc_state->frame_width_4x;
1079 dw_height = hevc_state->frame_height_4x >> 1;
1080 dw_width = ALIGN(dw_width, 64);
1081 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082 &vme_context->res_s4x_me_dist_surface,
1083 dw_width, dw_height, dw_width,
1089 if (hevc_state->hme_supported &&
1090 hevc_state->b16xme_supported) {
1091 i965_free_gpe_resource(&vme_context->res_s16x_memv_data_surface);
1092 dw_width = hevc_state->frame_width_16x * 4;
1093 dw_height = hevc_state->frame_height_16x >> 3;
1094 dw_width = ALIGN(dw_width, 64);
1095 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1096 &vme_context->res_s16x_memv_data_surface,
1097 dw_width, dw_height, dw_width,
1103 i965_free_gpe_resource(&vme_context->res_mv_dist_sum_buffer);
1105 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1106 &vme_context->res_mv_dist_sum_buffer,
1112 i965_free_gpe_resource(&vme_context->res_brc_me_dist_surface);
1113 dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
1114 dw_width = ALIGN(dw_width, 64);
1115 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
1116 dw_height = ALIGN(dw_height, 64);
1117 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1118 &vme_context->res_brc_me_dist_surface,
1119 dw_width, dw_height, dw_width,
1120 "ME BRC distortion");
1124 i965_free_gpe_resource(&vme_context->res_brc_input_enc_kernel_buffer);
1126 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1127 &vme_context->res_brc_input_enc_kernel_buffer,
1129 "Brc Input for Enc Kernel");
1133 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
1134 res_size = GEN10_HEVC_BRC_HISTORY_BUFFER_SIZE;
1135 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1136 &vme_context->res_brc_history_buffer,
1138 "Brc History buffer");
1142 i965_zero_gpe_resource(&vme_context->res_brc_history_buffer);
1144 i965_free_gpe_resource(&vme_context->res_brc_intra_dist_surface);
1145 dw_width = ALIGN(hevc_state->frame_width_4x / 2, 64);
1146 dw_height = ALIGN(hevc_state->frame_height_4x / 4, 8) * 2;
1147 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148 &vme_context->res_brc_intra_dist_surface,
1149 dw_width, dw_height, dw_width,
1150 "Brc Intra distortion buffer");
1154 i965_zero_gpe_resource(&vme_context->res_brc_intra_dist_surface);
1156 for (i = 0; i < 2; i++) {
1157 i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[i]);
1159 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1160 &vme_context->res_brc_pak_statistics_buffer[i],
1162 "Brc Pak statistics buffer");
1167 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_write_buffer);
1168 res_size = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS * 8;
1169 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1170 &vme_context->res_brc_pic_image_state_write_buffer,
1172 "Brc Pic State Write buffer");
1176 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_read_buffer);
1177 res_size = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS * 8;
1178 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1179 &vme_context->res_brc_pic_image_state_read_buffer,
1181 "Brc Pic State Read buffer");
1185 i965_free_gpe_resource(&vme_context->res_brc_const_data_surface);
1186 dw_width = ALIGN(GEN10_HEVC_BRC_CONST_SURFACE_WIDTH, 64);
1187 dw_height = ALIGN(GEN10_HEVC_BRC_CONST_SURFACE_HEIGHT, 32);
1188 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1189 &vme_context->res_brc_const_data_surface,
1190 dw_width, dw_height, dw_width,
1191 "Brc Const data buffer");
1195 i965_free_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
1197 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1198 &vme_context->res_brc_lcu_const_data_buffer,
1200 "BRC LCU Const_data buffer");
1204 i965_zero_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
1206 i965_free_gpe_resource(&vme_context->res_brc_mb_qp_surface);
1207 dw_width = ALIGN(hevc_state->frame_width_4x * 4, 64) >> 4;
1208 dw_height = ALIGN(hevc_state->frame_height_4x * 4, 64) >> 5;
1210 dw_width = ALIGN(dw_width, 64);
1211 dw_height = ALIGN(dw_height, 8);
1212 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1213 &vme_context->res_brc_mb_qp_surface,
1214 dw_width, dw_height, dw_width,
1215 "Brc LCU qp data buffer");
1219 i965_zero_gpe_resource(&vme_context->res_brc_mb_qp_surface);
1221 return VA_STATUS_SUCCESS;
1224 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1228 gen10_hevc_enc_init_const_resources(VADriverContextP ctx,
1229 struct encode_state *encode_state,
1230 struct intel_encoder_context *encoder_context)
1232 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1233 struct gen10_hevc_enc_state *hevc_state;
1236 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1238 buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_intra);
1240 return VA_STATUS_ERROR_OPERATION_FAILED;
1242 memcpy(buffer_ptr, gen10_hevc_enc_intra_const_lut,
1243 GEN10_HEVC_ENC_INTRA_CONST_LUT_SIZE);
1245 i965_unmap_gpe_resource(&vme_context->res_enc_const_table_intra);
1247 buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_inter);
1249 return VA_STATUS_ERROR_OPERATION_FAILED;
1251 memcpy(buffer_ptr, gen10_hevc_enc_inter_const_lut32,
1252 GEN10_HEVC_ENC_INTER_CONST_LUT32_SIZE);
1254 i965_unmap_gpe_resource(&vme_context->res_enc_const_table_inter);
1256 if (hevc_state->is_64lcu) {
1257 buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
1259 return VA_STATUS_ERROR_OPERATION_FAILED;
1261 memcpy(buffer_ptr, gen10_hevc_enc_inter_const_lut64,
1262 GEN10_HEVC_ENC_INTER_CONST_LUT64_SIZE);
1264 i965_unmap_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
1267 buffer_ptr = i965_map_gpe_resource(&vme_context->res_brc_const_data_surface);
1269 return VA_STATUS_ERROR_OPERATION_FAILED;
1271 memcpy(buffer_ptr, gen10_hevc_brc_qp_adjust_data, GEN10_HEVC_BRC_QP_ADJUST_SIZE);
1273 buffer_ptr += GEN10_HEVC_BRC_QP_ADJUST_SIZE;
1275 if (hevc_state->is_64lcu)
1276 memcpy(buffer_ptr, gen10_hevc_brc_lcu64_lambda_cost, GEN10_HEVC_BRC_LCU_LAMBDA_COST);
1278 memcpy(buffer_ptr, gen10_hevc_brc_lcu32_lambda_cost, GEN10_HEVC_BRC_LCU_LAMBDA_COST);
1280 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_surface);
1282 return VA_STATUS_SUCCESS;
1286 gen10_hevc_enc_check_parameters(VADriverContextP ctx,
1287 struct encode_state *encode_state,
1288 struct intel_encoder_context *encoder_context)
1290 VAEncSequenceParameterBufferHEVC *seq_param;
1291 VAEncPictureParameterBufferHEVC *pic_param;
1292 VAEncSliceParameterBufferHEVC *slice_param;
1295 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1296 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1297 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1299 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1300 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
1302 if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
1303 slice_param->slice_fields.bits.collocated_from_l0_flag &&
1304 (pic_param->collocated_ref_pic_index == 0xff ||
1305 pic_param->collocated_ref_pic_index > GEN10_MAX_REF_SURFACES))
1306 slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag = 0;
1308 if (slice_param->num_ref_idx_l0_active_minus1 > GEN10_HEVC_NUM_MAX_REF_L0 - 1 ||
1309 slice_param->num_ref_idx_l1_active_minus1 > GEN10_HEVC_NUM_MAX_REF_L1 - 1)
1310 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1312 if (slice_param->slice_type == HEVC_SLICE_P)
1313 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1316 i = seq_param->log2_diff_max_min_luma_coding_block_size +
1317 seq_param->log2_min_luma_coding_block_size_minus3 + 3;
1318 if (i < GEN10_HEVC_LOG2_MIN_HEVC_LCU ||
1319 i > GEN10_HEVC_LOG2_MAX_HEVC_LCU)
1320 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1322 i = seq_param->log2_min_transform_block_size_minus2 +
1323 seq_param->log2_diff_max_min_transform_block_size + 2;
1324 j = seq_param->log2_min_luma_coding_block_size_minus3 +
1325 seq_param->log2_diff_max_min_luma_coding_block_size + 3;
1328 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1330 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1331 i = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1332 j = -seq_param->seq_fields.bits.bit_depth_luma_minus8 * 6;
1333 if (i < j || i > 51)
1334 return VA_STATUS_ERROR_INVALID_PARAMETER;
1336 if (seq_param->seq_fields.bits.chroma_format_idc != 1)
1337 return VA_STATUS_ERROR_INVALID_PARAMETER;
1339 return VA_STATUS_SUCCESS;
1343 gen10_hevc_enc_init_misc_paramers(VADriverContextP ctx,
1344 struct encode_state *encode_state,
1345 struct intel_encoder_context *encoder_context)
1347 struct gen10_hevc_enc_context *vme_context = NULL;
1348 struct gen10_hevc_enc_state *hevc_state;
1349 struct gen10_hevc_enc_frame_info *frame_info;
1350 VAEncSequenceParameterBufferHEVC *seq_param;
1351 VAEncSliceParameterBufferHEVC *slice_param;
1352 uint32_t brc_method, brc_reset;
1354 vme_context = (struct gen10_hevc_enc_context *) encoder_context->vme_context;
1355 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1356 frame_info = &vme_context->frame_info;
1357 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1358 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1360 hevc_state->low_delay = frame_info->low_delay;
1362 hevc_state->frame_width = frame_info->frame_width;
1363 hevc_state->frame_height = frame_info->frame_height;
1365 hevc_state->frame_width_2x = ALIGN(frame_info->frame_width / 2, 32);
1366 hevc_state->frame_height_2x = ALIGN(frame_info->frame_height / 2, 32);
1368 hevc_state->frame_width_4x = ALIGN(frame_info->frame_width / 4, 32);
1369 hevc_state->frame_height_4x = ALIGN(frame_info->frame_height / 4, 32);
1371 hevc_state->frame_width_16x = ALIGN(hevc_state->frame_width_4x / 4, 32);
1372 hevc_state->frame_height_16x = ALIGN(hevc_state->frame_height_4x / 4, 32);
1374 hevc_state->cu_records_offset = ALIGN(frame_info->width_in_lcu *
1375 frame_info->height_in_lcu *
1378 hevc_state->hme_supported = 1;
1379 hevc_state->b16xme_supported = 1;
1381 if (hevc_state->frame_width_4x <= GEN10_HEVC_VME_REF_WIN ||
1382 hevc_state->frame_height_4x <= GEN10_HEVC_VME_REF_WIN) {
1383 hevc_state->b16xme_supported = 0;
1385 hevc_state->frame_width_4x = GEN10_HEVC_VME_REF_WIN;
1386 hevc_state->frame_height_4x = GEN10_HEVC_VME_REF_WIN;
1387 } else if (hevc_state->frame_width_16x <= GEN10_HEVC_VME_REF_WIN ||
1388 hevc_state->frame_height_16x <= GEN10_HEVC_VME_REF_WIN) {
1389 hevc_state->frame_width_16x = GEN10_HEVC_VME_REF_WIN;
1390 hevc_state->frame_height_16x = GEN10_HEVC_VME_REF_WIN;
1393 if (slice_param->slice_type == HEVC_SLICE_I) {
1394 hevc_state->hme_enabled = 0;
1395 hevc_state->b16xme_enabled = 0;
1397 hevc_state->hme_enabled = hevc_state->hme_supported;
1398 hevc_state->b16xme_enabled = hevc_state->b16xme_supported;
1401 if (frame_info->lcu_size == 64)
1402 hevc_state->is_64lcu = 1;
1404 hevc_state->is_64lcu = 0;
1406 if (frame_info->bit_depth_luma_minus8 ||
1407 frame_info->bit_depth_chroma_minus8)
1408 hevc_state->is_10bit = 1;
1410 hevc_state->is_10bit = 0;
1412 brc_method = GEN10_HEVC_BRC_CQP;
1413 if (encoder_context->rate_control_mode & VA_RC_CBR)
1414 brc_method = GEN10_HEVC_BRC_CBR;
1415 else if (encoder_context->rate_control_mode & VA_RC_VBR)
1416 brc_method = GEN10_HEVC_BRC_VBR;
1418 brc_reset = hevc_state->brc.brc_method != brc_method ||
1419 frame_info->reallocate_flag;
1421 if (!hevc_state->brc.brc_inited ||
1422 encoder_context->brc.need_reset ||
1424 if (brc_method == GEN10_HEVC_BRC_CQP) {
1425 hevc_state->brc.brc_enabled = 0;
1426 hevc_state->num_pak_passes = 1;
1428 hevc_state->brc.brc_enabled = 1;
1429 hevc_state->num_pak_passes = 1;//2;
1431 if (brc_method == GEN10_HEVC_BRC_CBR) {
1432 hevc_state->brc.target_bit_rate = encoder_context->brc.bits_per_second[0];
1433 hevc_state->brc.max_bit_rate = encoder_context->brc.bits_per_second[0];
1434 hevc_state->brc.min_bit_rate = encoder_context->brc.bits_per_second[0];
1435 hevc_state->brc.window_size = encoder_context->brc.window_size;
1437 hevc_state->brc.max_bit_rate = encoder_context->brc.bits_per_second[0];
1438 hevc_state->brc.target_bit_rate = encoder_context->brc.bits_per_second[0] *
1439 encoder_context->brc.target_percentage[0] /
1442 if (2 * hevc_state->brc.target_bit_rate < hevc_state->brc.max_bit_rate)
1443 hevc_state->brc.min_bit_rate = 0;
1445 hevc_state->brc.min_bit_rate = 2 * hevc_state->brc.target_bit_rate -
1446 hevc_state->brc.max_bit_rate;
1450 if (encoder_context->brc.hrd_buffer_size)
1451 hevc_state->brc.vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
1452 else if (encoder_context->brc.window_size)
1453 hevc_state->brc.vbv_buffer_size_in_bit = hevc_state->brc.max_bit_rate *
1454 encoder_context->brc.window_size /
1457 hevc_state->brc.vbv_buffer_size_in_bit = hevc_state->brc.max_bit_rate;
1459 if (encoder_context->brc.hrd_initial_buffer_fullness)
1460 hevc_state->brc.init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
1462 hevc_state->brc.init_vbv_buffer_fullness_in_bit = hevc_state->brc.vbv_buffer_size_in_bit / 2;
1464 hevc_state->brc.gop_size = encoder_context->brc.gop_size;
1465 hevc_state->brc.gop_p = encoder_context->brc.num_pframes_in_gop;
1466 hevc_state->brc.gop_b = encoder_context->brc.num_bframes_in_gop;
1468 hevc_state->brc.frame_rate_m = encoder_context->brc.framerate[0].num;
1469 hevc_state->brc.frame_rate_d = encoder_context->brc.framerate[0].den;
1471 hevc_state->brc.brc_method = brc_method;
1472 hevc_state->brc.brc_reset = brc_reset || encoder_context->brc.need_reset;
1474 if (brc_method == GEN10_HEVC_BRC_CQP && !hevc_state->brc.brc_inited) {
1475 hevc_state->brc.frame_rate_m = 30;
1476 hevc_state->brc.frame_rate_d = 1;
1478 hevc_state->brc.target_bit_rate = (hevc_state->frame_width >> 4) * (hevc_state->frame_height >> 4)
1479 * 30 * 384 / 10 * 8;
1480 hevc_state->brc.max_bit_rate = hevc_state->brc.target_bit_rate;
1481 hevc_state->brc.min_bit_rate = hevc_state->brc.target_bit_rate;
1482 hevc_state->brc.window_size = 1500;
1483 hevc_state->brc.vbv_buffer_size_in_bit = (hevc_state->brc.target_bit_rate / 1000) * 1500;
1484 hevc_state->brc.init_vbv_buffer_fullness_in_bit = hevc_state->brc.vbv_buffer_size_in_bit / 2;
1486 hevc_state->brc.gop_size = seq_param->intra_period < 2 ? 30 : seq_param->intra_period;
1487 hevc_state->brc.gop_p = (hevc_state->brc.gop_size - 1) /
1488 (!seq_param->ip_period ? 1 : seq_param->ip_period);
1489 hevc_state->brc.gop_b = hevc_state->brc.gop_size - 1 - hevc_state->brc.gop_p;
1492 hevc_state->profile_level_max_frame =
1493 gen10_hevc_enc_get_profile_level_max_frame(seq_param, 0,
1494 hevc_state->brc.frame_rate_m /
1495 hevc_state->brc.frame_rate_d);
1498 hevc_state->sao_2nd_needed = 0;
1499 hevc_state->sao_first_pass_flag = 0;
1500 hevc_state->num_sao_passes = hevc_state->num_pak_passes;
1501 if (seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag &&
1502 (slice_param->slice_fields.bits.slice_sao_luma_flag ||
1503 slice_param->slice_fields.bits.slice_sao_chroma_flag)) {
1504 hevc_state->sao_2nd_needed = 1;
1505 hevc_state->sao_first_pass_flag = 1;
1506 hevc_state->num_sao_passes = hevc_state->num_pak_passes + 1;
1509 hevc_state->brc.target_usage = encoder_context->quality_level;
1510 hevc_state->thread_num_per_ctb = gen10_hevc_tu_settings[GEN10_TOTAL_THREAD_NUM_PER_LCU_TU_PARAM]
1511 [(hevc_state->brc.target_usage + 1) >> 2];
1513 hevc_state->is_same_ref_list = frame_info->is_same_ref_list;
1515 return VA_STATUS_SUCCESS;
1519 gen10_hevc_enc_init_parameters(VADriverContextP ctx,
1520 struct encode_state *encode_state,
1521 struct intel_encoder_context *encoder_context)
1523 struct gen10_hevc_enc_context *vme_context;
1524 struct gen10_hevc_enc_state *hevc_state;
1525 struct gen10_hevc_enc_frame_info *frame_info;
1526 struct gen10_hevc_enc_common_res *common_res;
1527 VAStatus va_status = VA_STATUS_SUCCESS;
1529 va_status = gen10_hevc_enc_check_parameters(ctx, encode_state, encoder_context);
1530 if (va_status != VA_STATUS_SUCCESS)
1533 vme_context = (struct gen10_hevc_enc_context *) encoder_context->vme_context;
1534 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1535 frame_info = &vme_context->frame_info;
1536 common_res = &vme_context->common_res;
1538 gen10_hevc_enc_init_frame_info(ctx, encode_state, encoder_context, frame_info);
1539 gen10_hevc_enc_init_status_buffer(ctx, encode_state, encoder_context,
1540 &vme_context->status_buffer);
1542 if (!hevc_state->lambda_init ||
1543 frame_info->reallocate_flag) {
1544 gen10_hevc_enc_init_lambda_param(&vme_context->lambda_param, frame_info->bit_depth_luma_minus8,
1545 frame_info->bit_depth_chroma_minus8);
1547 hevc_state->lambda_init = 1;
1550 if (gen10_hevc_enc_init_common_resource(ctx, encode_state, encoder_context,
1553 frame_info->picture_coding_type != HEVC_SLICE_I,
1555 va_status = VA_STATUS_ERROR_ALLOCATION_FAILED;
1559 va_status = gen10_hevc_enc_init_misc_paramers(ctx, encode_state, encoder_context);
1560 if (va_status != VA_STATUS_SUCCESS)
1563 va_status = gen10_hevc_enc_ensure_surface(ctx,
1564 common_res->uncompressed_pic.obj_surface,
1565 frame_info->bit_depth_luma_minus8,
1567 if (va_status != VA_STATUS_SUCCESS)
1570 va_status = gen10_hevc_enc_ensure_surface(ctx,
1571 common_res->reconstructed_pic.obj_surface,
1572 frame_info->bit_depth_luma_minus8,
1574 if (va_status != VA_STATUS_SUCCESS)
1577 va_status = gen10_hevc_init_surface_priv(ctx, encode_state, encoder_context,
1578 common_res->reconstructed_pic.obj_surface);
1579 if (va_status != VA_STATUS_SUCCESS)
1582 if (frame_info->reallocate_flag) {
1583 va_status = gen10_hevc_allocate_enc_resources(ctx, encode_state,
1585 if (va_status != VA_STATUS_SUCCESS)
1588 hevc_state->frame_number = 0;
1591 va_status = gen10_hevc_enc_init_const_resources(ctx, encode_state, encoder_context);
1592 if (va_status != VA_STATUS_SUCCESS)
1599 #define GEN10_WALKER_26_DEGREE 0
1600 #define GEN10_WALKER_26Z_DEGREE 1
1601 #define GEN10_WALKER_26X_DEGREE 2
1602 #define GEN10_WALKER_26ZX_DEGREE 3
1605 gen10_init_media_object_walker_parameter(struct gen10_hevc_enc_kernel_walker_parameter *kernel_walker_param,
1606 struct gpe_media_object_walker_parameter *walker_param)
1608 memset(walker_param, 0, sizeof(*walker_param));
1610 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
1612 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
1613 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
1615 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
1616 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
1618 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
1619 walker_param->global_outer_loop_stride.y = 0;
1621 walker_param->global_inner_loop_unit.x = 0;
1622 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
1624 walker_param->local_loop_exec_count = 0xFFFF;
1625 walker_param->global_loop_exec_count = 0xFFFF;
1627 if (kernel_walker_param->no_dependency) {
1628 walker_param->scoreboard_mask = 0;
1629 walker_param->use_scoreboard = 0;
1630 walker_param->local_outer_loop_stride.x = 0;
1631 walker_param->local_outer_loop_stride.y = 1;
1632 walker_param->local_inner_loop_unit.x = 1;
1633 walker_param->local_inner_loop_unit.y = 0;
1634 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
1635 walker_param->local_end.y = 0;
1636 } else if (kernel_walker_param->use_vertical_scan) {
1637 walker_param->scoreboard_mask = 0x1;
1638 walker_param->local_outer_loop_stride.x = 1;
1639 walker_param->local_outer_loop_stride.y = 0;
1640 walker_param->local_inner_loop_unit.x = 0;
1641 walker_param->local_inner_loop_unit.y = 1;
1642 walker_param->local_end.x = 0;
1643 walker_param->local_end.y = kernel_walker_param->resolution_y - 1;
1645 walker_param->local_end.x = 0;
1646 walker_param->local_end.y = 0;
1651 gen10_run_kernel_media_object(VADriverContextP ctx,
1652 struct intel_encoder_context *encoder_context,
1653 struct i965_gpe_context *gpe_context,
1655 struct gpe_media_object_parameter *param)
1657 struct intel_batchbuffer *batch = encoder_context->base.batch;
1658 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1659 struct gen10_hevc_enc_status_buffer *status_buffer;
1660 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1662 status_buffer = &vme_context->status_buffer;
1664 intel_batchbuffer_start_atomic(batch, 0x1000);
1666 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1667 mi_store_data_imm.bo = status_buffer->gpe_res.bo;
1668 mi_store_data_imm.offset = status_buffer->status_media_state_offset;
1669 mi_store_data_imm.dw0 = media_function;
1670 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1672 intel_batchbuffer_emit_mi_flush(batch);
1674 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1675 gen8_gpe_media_object(ctx, gpe_context, batch, param);
1676 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1678 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1680 intel_batchbuffer_end_atomic(batch);
1682 intel_batchbuffer_flush(batch);
1686 gen10_run_kernel_media_object_walker(VADriverContextP ctx,
1687 struct intel_encoder_context *encoder_context,
1688 struct i965_gpe_context *gpe_context,
1690 struct gpe_media_object_walker_parameter *param)
1692 struct intel_batchbuffer *batch = encoder_context->base.batch;
1693 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1694 struct gen10_hevc_enc_status_buffer *status_buffer;
1695 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1697 status_buffer = &vme_context->status_buffer;
1699 intel_batchbuffer_start_atomic(batch, 0x1000);
1701 intel_batchbuffer_emit_mi_flush(batch);
1703 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1704 mi_store_data_imm.bo = status_buffer->gpe_res.bo;
1705 mi_store_data_imm.offset = status_buffer->status_media_state_offset;
1706 mi_store_data_imm.dw0 = media_function;
1707 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1709 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1710 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
1711 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1713 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1715 intel_batchbuffer_end_atomic(batch);
1717 intel_batchbuffer_flush(batch);
1720 #define BRC_CLIP(x, min, max) \
1722 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
1725 #define GEN10_HEVC_MAX_BRC_PASSES 4
1727 #define GEN10_HEVC_BRCINIT_ISCBR 0x0010
1728 #define GEN10_HEVC_BRCINIT_ISVBR 0x0020
1729 #define GEN10_HEVC_BRCINIT_ISCQP 0x4000
1730 #define GEN10_HEVC_BRCINIT_DISABLE_MBBRC 0x8000
1733 gen10_hevc_enc_brc_init_set_curbe(VADriverContextP ctx,
1734 struct encode_state *encode_state,
1735 struct intel_encoder_context *encoder_context,
1736 struct i965_gpe_context *gpe_context)
1738 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1739 struct gen10_hevc_enc_state *hevc_state;
1740 gen10_hevc_brc_init_curbe_data *brc_curbe;
1741 double input_bits_per_frame, bps_ratio;
1743 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1745 brc_curbe = i965_gpe_context_map_curbe(gpe_context);
1750 memset(brc_curbe, 0, sizeof(gen10_hevc_brc_init_curbe_data));
1752 brc_curbe->dw0.profile_level_max_frame = hevc_state->profile_level_max_frame;
1753 brc_curbe->dw1.init_buf_full = hevc_state->brc.init_vbv_buffer_fullness_in_bit;
1754 brc_curbe->dw2.buf_size = hevc_state->brc.vbv_buffer_size_in_bit;
1755 brc_curbe->dw3.target_bit_rate = hevc_state->brc.target_bit_rate;
1756 brc_curbe->dw4.maximum_bit_rate = hevc_state->brc.max_bit_rate;
1757 brc_curbe->dw5.minimum_bit_rate = 0;
1758 brc_curbe->dw6.frame_ratem = hevc_state->brc.frame_rate_m;
1759 brc_curbe->dw7.frame_rated = hevc_state->brc.frame_rate_d;
1760 if (hevc_state->brc.lcu_brc_enabled)
1761 brc_curbe->dw8.brc_flag = 0;
1763 brc_curbe->dw8.brc_flag = GEN10_HEVC_BRCINIT_DISABLE_MBBRC;
1765 brc_curbe->dw25.ac_qp_buffer = 1;
1766 brc_curbe->dw25.log2_max_cu_size = hevc_state->is_64lcu ? 6 : 5;
1767 brc_curbe->dw25.sliding_wind_size = 30;
1769 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP) {
1770 brc_curbe->dw8.brc_flag = GEN10_HEVC_BRCINIT_ISCQP;
1771 } else if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CBR) {
1772 brc_curbe->dw8.brc_flag |= GEN10_HEVC_BRCINIT_ISCBR;
1773 } else if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_VBR) {
1774 brc_curbe->dw8.brc_flag |= GEN10_HEVC_BRCINIT_ISVBR;
1777 brc_curbe->dw9.frame_width = hevc_state->frame_width;
1778 brc_curbe->dw10.frame_height = hevc_state->frame_height;
1779 brc_curbe->dw10.avbr_accuracy = 30;
1780 brc_curbe->dw11.avbr_convergence = 150;
1782 brc_curbe->dw14.max_brc_level = 1;
1783 brc_curbe->dw8.brc_gopp = hevc_state->brc.gop_p;
1784 brc_curbe->dw9.brc_gopb = hevc_state->brc.gop_b;
1786 brc_curbe->dw11.minimum_qp = 1;
1787 brc_curbe->dw12.maximum_qp = 51;
1789 brc_curbe->dw16.instant_rate_thr0_pframe = 40;
1790 brc_curbe->dw16.instant_rate_thr1_pframe = 60;
1791 brc_curbe->dw16.instant_rate_thr2_pframe = 80;
1792 brc_curbe->dw16.instant_rate_thr3_pframe = 120;
1793 brc_curbe->dw17.instant_rate_thr0_bframe = 35;
1794 brc_curbe->dw17.instant_rate_thr1_bframe = 60;
1795 brc_curbe->dw17.instant_rate_thr2_bframe = 80;
1796 brc_curbe->dw17.instant_rate_thr3_bframe = 120;
1797 brc_curbe->dw18.instant_rate_thr0_iframe = 40;
1798 brc_curbe->dw18.instant_rate_thr1_iframe = 60;
1799 brc_curbe->dw18.instant_rate_thr2_iframe = 90;
1800 brc_curbe->dw18.instant_rate_thr3_iframe = 115;
1802 input_bits_per_frame = (double)(brc_curbe->dw4.maximum_bit_rate) * ((double)(hevc_state->brc.frame_rate_d)) /
1803 ((double)(hevc_state->brc.frame_rate_m));
1805 if (brc_curbe->dw2.buf_size < (uint32_t)input_bits_per_frame * 4)
1806 brc_curbe->dw2.buf_size = (uint32_t)input_bits_per_frame * 4;
1808 if (!brc_curbe->dw1.init_buf_full)
1809 brc_curbe->dw1.init_buf_full = 7 * brc_curbe->dw2.buf_size / 8;
1810 else if (brc_curbe->dw1.init_buf_full < (uint32_t)input_bits_per_frame * 2)
1811 brc_curbe->dw1.init_buf_full = (uint32_t)input_bits_per_frame * 2;
1812 else if (brc_curbe->dw1.init_buf_full > brc_curbe->dw2.buf_size)
1813 brc_curbe->dw1.init_buf_full = brc_curbe->dw2.buf_size;
1815 bps_ratio = input_bits_per_frame / ((double)(hevc_state->brc.vbv_buffer_size_in_bit) / 30);
1817 BRC_CLIP(bps_ratio, 0.1, 3.5);
1819 brc_curbe->dw19.deviation_thr0_pbframe = (uint32_t)(-50 * pow(0.90, bps_ratio));
1820 brc_curbe->dw19.deviation_thr1_pbframe = (uint32_t)(-50 * pow(0.66, bps_ratio));
1821 brc_curbe->dw19.deviation_thr2_pbframe = (uint32_t)(-50 * pow(0.46, bps_ratio));
1822 brc_curbe->dw19.deviation_thr3_pbframe = (uint32_t)(-50 * pow(0.3, bps_ratio));
1824 brc_curbe->dw20.deviation_thr4_pbframe = (uint32_t)(50 * pow(0.3, bps_ratio));
1825 brc_curbe->dw20.deviation_thr5_pbframe = (uint32_t)(50 * pow(0.46, bps_ratio));
1826 brc_curbe->dw20.deviation_thr6_pbframe = (uint32_t)(50 * pow(0.7, bps_ratio));
1827 brc_curbe->dw20.deviation_thr7_pbframe = (uint32_t)(50 * pow(0.9, bps_ratio));
1829 brc_curbe->dw21.deviation_thr0_vbrctrl = (uint32_t)(-50 * pow(0.9, bps_ratio));
1830 brc_curbe->dw21.deviation_thr1_vbrctrl = (uint32_t)(-50 * pow(0.7, bps_ratio));
1831 brc_curbe->dw21.deviation_thr2_vbrctrl = (uint32_t)(-50 * pow(0.5, bps_ratio));
1832 brc_curbe->dw21.deviation_thr3_vbrctrl = (uint32_t)(-50 * pow(0.3, bps_ratio));
1834 brc_curbe->dw22.deviation_thr4_vbrctrl = (uint32_t)(100 * pow(0.4, bps_ratio));
1835 brc_curbe->dw22.deviation_thr5_vbrctrl = (uint32_t)(100 * pow(0.5, bps_ratio));
1836 brc_curbe->dw22.deviation_thr6_vbrctrl = (uint32_t)(100 * pow(0.75, bps_ratio));
1837 brc_curbe->dw22.deviation_thr7_vbrctrl = (uint32_t)(100 * pow(0.9, bps_ratio));
1839 brc_curbe->dw23.deviation_thr0_iframe = (uint32_t)(-50 * pow(0.8, bps_ratio));
1840 brc_curbe->dw23.deviation_thr1_iframe = (uint32_t)(-50 * pow(0.6, bps_ratio));
1841 brc_curbe->dw23.deviation_thr2_iframe = (uint32_t)(-50 * pow(0.34, bps_ratio));
1842 brc_curbe->dw23.deviation_thr3_iframe = (uint32_t)(-50 * pow(0.2, bps_ratio));
1844 brc_curbe->dw24.deviation_thr4_iframe = (uint32_t)(50 * pow(0.2, bps_ratio));
1845 brc_curbe->dw24.deviation_thr5_iframe = (uint32_t)(50 * pow(0.4, bps_ratio));
1846 brc_curbe->dw24.deviation_thr6_iframe = (uint32_t)(50 * pow(0.66, bps_ratio));
1847 brc_curbe->dw24.deviation_thr7_iframe = (uint32_t)(50 * pow(0.9, bps_ratio));
1849 if (!hevc_state->brc.brc_inited)
1850 hevc_state->brc.brc_init_current_target_buf_full_in_bits = brc_curbe->dw1.init_buf_full;
1852 hevc_state->brc.brc_init_reset_buf_size_in_bits = (double)brc_curbe->dw2.buf_size;
1853 hevc_state->brc.brc_init_reset_input_bits_per_frame = input_bits_per_frame;
1855 i965_gpe_context_unmap_curbe(gpe_context);
1859 gen10_hevc_enc_brc_init_add_surfaces(VADriverContextP ctx,
1860 struct encode_state *encode_state,
1861 struct intel_encoder_context *encoder_context,
1862 struct i965_gpe_context *gpe_context)
1864 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1866 i965_add_buffer_gpe_surface(ctx,
1868 &vme_context->res_brc_history_buffer,
1870 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
1874 i965_add_buffer_2d_gpe_surface(ctx,
1876 &vme_context->res_brc_me_dist_surface,
1878 I965_SURFACEFORMAT_R8_UNORM,
1883 gen10_hevc_enc_brc_init_reset(VADriverContextP ctx,
1884 struct encode_state *encode_state,
1885 struct intel_encoder_context *encoder_context)
1887 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1888 struct gen10_hevc_enc_state *hevc_state;
1889 struct gpe_media_object_parameter media_object_param;
1890 struct i965_gpe_context *gpe_context;
1891 int gpe_index = GEN10_HEVC_BRC_INIT;
1892 int media_function = GEN10_HEVC_MEDIA_STATE_BRC_INIT_RESET;
1894 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1896 if (hevc_state->brc.brc_inited)
1897 gpe_index = GEN10_HEVC_BRC_RESET;
1899 gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
1901 gen8_gpe_context_init(ctx, gpe_context);
1902 gen9_gpe_reset_binding_table(ctx, gpe_context);
1904 gen10_hevc_enc_brc_init_set_curbe(ctx, encode_state, encoder_context, gpe_context);
1905 gen10_hevc_enc_brc_init_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
1907 gen8_gpe_setup_interface_data(ctx, gpe_context);
1909 memset(&media_object_param, 0, sizeof(media_object_param));
1910 gen10_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1914 gen10_hevc_brc_add_pic_img_state(VADriverContextP ctx,
1915 struct encode_state *encode_state,
1916 struct intel_encoder_context *encoder_context)
1918 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
1919 struct gen10_hevc_enc_state *hevc_state;
1920 VAEncPictureParameterBufferHEVC *pic_param;
1921 VAEncSequenceParameterBufferHEVC *seq_param;
1922 VAEncSliceParameterBufferHEVC *slice_param;
1923 unsigned int batch_value = 0, tmp_value, i;
1924 uint32_t *batch_ptr, *buffer_ptr;
1926 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
1928 buffer_ptr = (uint32_t *)i965_map_gpe_resource(&pak_context->res_brc_pic_image_state_read_buffer);
1933 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1934 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1935 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1937 for (i = 0; i < 4; i++) {
1938 batch_ptr = buffer_ptr + 32 * i;
1942 *(batch_ptr++) = HCP_PIC_STATE | (31 - 2);
1945 batch_value = (pak_context->frame_info.width_in_cu - 1) |
1946 ((pak_context->frame_info.height_in_cu - 1) << 16);
1947 batch_value |= pic_param->pic_fields.bits.transform_skip_enabled_flag << 15;
1948 *(batch_ptr++) = batch_value;
1950 batch_value = (seq_param->log2_min_pcm_luma_coding_block_size_minus3 << 8) |
1951 (seq_param->log2_max_pcm_luma_coding_block_size_minus3 << 10) |
1952 (seq_param->log2_min_transform_block_size_minus2 << 4) |
1953 ((seq_param->log2_min_transform_block_size_minus2 +
1954 seq_param->log2_diff_max_min_transform_block_size) << 6) |
1955 ((seq_param->log2_min_luma_coding_block_size_minus3 +
1956 seq_param->log2_diff_max_min_luma_coding_block_size) << 2) |
1957 (seq_param->log2_min_luma_coding_block_size_minus3 << 0);
1960 *(batch_ptr++) = batch_value;
1967 if ((slice_param->slice_fields.bits.slice_sao_luma_flag ||
1968 slice_param->slice_fields.bits.slice_sao_chroma_flag) &&
1969 !hevc_state->is_10bit)
1970 batch_value |= (1 << 3);
1972 if (pic_param->pic_fields.bits.cu_qp_delta_enabled_flag) {
1973 tmp_value = pic_param->diff_cu_qp_delta_depth;
1974 batch_value |= (1 << 5) | (tmp_value << 6);
1976 batch_value |= (0 << 4) |
1977 (seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8) |
1979 (0 << 10) | //(pic_param->log2_parallel_merge_level_minus2
1982 (0 << 17) | //tile is disabled.
1983 (pic_param->pic_fields.bits.weighted_bipred_flag << 18) |
1984 (pic_param->pic_fields.bits.weighted_pred_flag << 19) |
1985 (0 << 20) | //20/21 is reserved.
1986 (pic_param->pic_fields.bits.transform_skip_enabled_flag << 22) |
1987 (seq_param->seq_fields.bits.amp_enabled_flag << 23) |
1988 (pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25) |
1989 (seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26) |
1990 (0 << 27); // VME CU packet
1992 *(batch_ptr++) = batch_value;
1995 batch_value = (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
1996 (pic_param->pps_cb_qp_offset & 0x1f);
1997 batch_value |= (seq_param->max_transform_hierarchy_depth_inter << 13) |
1998 (seq_param->max_transform_hierarchy_depth_intra << 10) |
1999 (seq_param->pcm_sample_bit_depth_luma_minus1 << 20) |
2000 (seq_param->pcm_sample_bit_depth_chroma_minus1 << 16) |
2001 (seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27) |
2002 (seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24);
2003 *(batch_ptr++) = batch_value;
2006 batch_value = pic_param->ctu_max_bitsize_allowed;
2007 batch_value |= (0 << 24 |
2010 0 << 29); // bit 29 reload slice_pointer_flag.
2013 batch_value |= (0 << 16); // Initial pass
2015 batch_value |= (1 << 16); // subsequent pass
2016 *(batch_ptr++) = batch_value;
2018 /* DW 7. Frame_rate Max */
2021 /* Dw 8. Frame_rate Min */
2024 /* DW 9. Frame_rate Min/MAX slice_delta */
2057 *(batch_ptr++) = MI_BATCH_BUFFER_END;
2060 i965_unmap_gpe_resource(&pak_context->res_brc_pic_image_state_read_buffer);
2064 gen10_hevc_enc_brc_frame_update_add_surfaces(VADriverContextP ctx,
2065 struct encode_state *encode_state,
2066 struct intel_encoder_context *encoder_context,
2067 struct i965_gpe_context *gpe_context)
2069 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2070 struct gen10_hevc_enc_state *hevc_state;
2073 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2075 i965_add_buffer_gpe_surface(ctx,
2077 &vme_context->res_brc_history_buffer,
2079 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
2083 pak_read_idx = !hevc_state->curr_pak_stat_index;
2084 i965_add_buffer_gpe_surface(ctx,
2086 &vme_context->res_brc_pak_statistics_buffer[pak_read_idx],
2088 BYTES2UINT32(vme_context->res_brc_pak_statistics_buffer[pak_read_idx].size),
2092 i965_add_buffer_gpe_surface(ctx,
2094 &vme_context->res_brc_pic_image_state_read_buffer,
2096 BYTES2UINT32(vme_context->res_brc_pic_image_state_read_buffer.size),
2100 i965_add_buffer_gpe_surface(ctx,
2102 &vme_context->res_brc_pic_image_state_write_buffer,
2104 BYTES2UINT32(vme_context->res_brc_pic_image_state_write_buffer.size),
2108 i965_add_buffer_gpe_surface(ctx,
2110 &vme_context->res_brc_input_enc_kernel_buffer,
2112 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
2116 i965_add_buffer_2d_gpe_surface(ctx,
2118 &vme_context->res_brc_me_dist_surface,
2120 I965_SURFACEFORMAT_R8_UNORM,
2123 i965_add_buffer_2d_gpe_surface(ctx,
2125 &vme_context->res_brc_const_data_surface,
2127 I965_SURFACEFORMAT_R8_UNORM,
2130 i965_add_buffer_2d_gpe_surface(ctx,
2132 &vme_context->res_mb_stat_surface,
2134 I965_SURFACEFORMAT_R8_UNORM,
2137 i965_add_buffer_gpe_surface(ctx,
2139 &vme_context->res_mv_dist_sum_buffer,
2141 BYTES2UINT32(vme_context->res_mv_dist_sum_buffer.size),
2145 return VA_STATUS_SUCCESS;
2149 gen10_hevc_enc_brc_update_set_curbe(VADriverContextP ctx,
2150 struct encode_state *encode_state,
2151 struct intel_encoder_context *encoder_context,
2152 struct i965_gpe_context *gpe_context,
2155 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2156 struct gen10_hevc_enc_state *hevc_state;
2157 gen10_hevc_brc_update_curbe_data *brc_update;
2158 VAEncSliceParameterBufferHEVC *slice_param;
2159 VAEncPictureParameterBufferHEVC *pic_param;
2161 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2163 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2164 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2166 brc_update = i965_gpe_context_map_curbe(gpe_context);
2169 return VA_STATUS_ERROR_OPERATION_FAILED;
2171 memset(brc_update, 0, sizeof(gen10_hevc_brc_update_curbe_data));
2173 if (hevc_state->brc.brc_init_current_target_buf_full_in_bits >
2174 (double)hevc_state->brc.brc_init_reset_buf_size_in_bits) {
2175 hevc_state->brc.brc_init_current_target_buf_full_in_bits -=
2176 (double)hevc_state->brc.brc_init_reset_buf_size_in_bits;
2177 brc_update->dw5.target_size_flag = 1;
2180 brc_update->dw0.target_size = (uint32_t)(hevc_state->brc.brc_init_current_target_buf_full_in_bits);
2181 brc_update->dw1.frame_num = hevc_state->frame_number;
2183 brc_update->dw2.picture_header_size = gen10_hevc_enc_get_pic_header_size(encode_state);
2185 if (slice_param->slice_type == HEVC_SLICE_I)
2186 brc_update->dw5.curr_frame_brclevel = 2;
2187 else if (slice_param->slice_type == HEVC_SLICE_P ||
2188 hevc_state->low_delay)
2189 brc_update->dw5.curr_frame_brclevel = 0;
2191 brc_update->dw5.curr_frame_brclevel = 1;
2193 brc_update->dw5.max_num_paks = GEN10_HEVC_MAX_BRC_PASSES;
2195 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP) {
2198 qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2199 BRC_CLIP(qp_value, 1, 51);
2200 brc_update->dw6.cqp_value = qp_value;
2203 brc_update->dw14.parallel_mode = 0;
2205 if (lcu_update == 1)
2206 hevc_state->brc.brc_init_current_target_buf_full_in_bits +=
2207 hevc_state->brc.brc_init_reset_input_bits_per_frame;
2209 brc_update->dw3.start_gadj_frame0 = 10;
2210 brc_update->dw3.start_gadj_frame1 = 50;
2211 brc_update->dw4.start_gadj_frame2 = 100;
2212 brc_update->dw4.start_gadj_frame3 = 150;
2214 brc_update->dw8.start_gadj_mult0 = 1;
2215 brc_update->dw8.start_gadj_mult1 = 1;
2216 brc_update->dw8.start_gadj_mult2 = 3;
2217 brc_update->dw8.start_gadj_mult3 = 2;
2218 brc_update->dw9.start_gadj_mult4 = 1;
2220 brc_update->dw9.start_gadj_divd0 = 40;
2221 brc_update->dw9.start_gadj_divd1 = 5;
2222 brc_update->dw9.start_gadj_divd2 = 5;
2223 brc_update->dw10.start_gadj_divd3 = 3;
2224 brc_update->dw10.start_gadj_divd4 = 1;
2226 brc_update->dw10.qp_threshold0 = 7;
2227 brc_update->dw10.qp_threshold1 = 18;
2228 brc_update->dw11.qp_threshold2 = 25;
2229 brc_update->dw11.qp_threshold3 = 37;
2231 brc_update->dw11.grate_ratio_thr0 = 40;
2232 brc_update->dw11.grate_ratio_thr1 = 75;
2233 brc_update->dw12.grate_ratio_thr2 = 97;
2234 brc_update->dw12.grate_ratio_thr3 = 103;
2235 brc_update->dw12.grate_ratio_thr4 = 125;
2236 brc_update->dw12.grate_ratio_thr5 = 160;
2238 brc_update->dw13.grate_ratio_thr6 = -3;
2239 brc_update->dw13.grate_ratio_thr7 = -2;
2240 brc_update->dw13.grate_ratio_thr8 = -1;
2241 brc_update->dw13.grate_ratio_thr9 = 0;
2243 brc_update->dw14.grate_ratio_thr10 = 1;
2244 brc_update->dw14.grate_ratio_thr11 = 2;
2245 brc_update->dw14.grate_ratio_thr12 = 3;
2247 i965_gpe_context_unmap_curbe(gpe_context);
2248 return VA_STATUS_SUCCESS;
2252 gen10_hevc_enc_brc_frame_update_kernel(VADriverContextP ctx,
2253 struct encode_state *encode_state,
2254 struct intel_encoder_context *encoder_context)
2256 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2257 struct i965_gpe_context *gpe_context;
2258 int gpe_index = GEN10_HEVC_BRC_FRAME_UPDATE;
2259 int media_function = GEN10_HEVC_MEDIA_STATE_BRC_UPDATE;
2260 struct gpe_media_object_parameter media_object_param;
2262 gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
2264 gen8_gpe_context_init(ctx, gpe_context);
2265 gen9_gpe_reset_binding_table(ctx, gpe_context);
2267 gen10_hevc_brc_add_pic_img_state(ctx, encode_state, encoder_context);
2268 gen10_hevc_enc_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context, 0);
2269 gen10_hevc_enc_brc_frame_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2270 gen8_gpe_setup_interface_data(ctx, gpe_context);
2272 memset(&media_object_param, 0, sizeof(media_object_param));
2273 gen10_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
2277 gen10_hevc_enc_brc_lcu_update_add_surfaces(VADriverContextP ctx,
2278 struct encode_state *encode_state,
2279 struct intel_encoder_context *encoder_context,
2280 struct i965_gpe_context *gpe_context)
2282 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2284 i965_add_buffer_gpe_surface(ctx,
2286 &vme_context->res_brc_history_buffer,
2288 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
2292 i965_add_buffer_2d_gpe_surface(ctx,
2294 &vme_context->res_brc_me_dist_surface,
2296 I965_SURFACEFORMAT_R8_UNORM,
2299 i965_add_buffer_2d_gpe_surface(ctx,
2301 &vme_context->res_mb_stat_surface,
2303 I965_SURFACEFORMAT_R8_UNORM,
2306 i965_add_buffer_2d_gpe_surface(ctx,
2308 &vme_context->res_brc_mb_qp_surface,
2310 I965_SURFACEFORMAT_R8_UNORM,
2313 i965_add_buffer_2d_gpe_surface(ctx,
2315 &vme_context->res_mb_split_surface,
2317 I965_SURFACEFORMAT_R8_UNORM,
2320 i965_add_buffer_2d_gpe_surface(ctx,
2322 &vme_context->res_brc_intra_dist_surface,
2324 I965_SURFACEFORMAT_R8_UNORM,
2327 i965_add_buffer_2d_gpe_surface(ctx,
2329 &vme_context->res_cu_split_surface,
2331 I965_SURFACEFORMAT_R8_UNORM,
2336 gen10_hevc_enc_brc_lcu_update_kernel(VADriverContextP ctx,
2337 struct encode_state *encode_state,
2338 struct intel_encoder_context *encoder_context)
2340 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2341 struct gen10_hevc_enc_state *hevc_state;
2342 struct i965_gpe_context *gpe_context;
2343 int gpe_index = GEN10_HEVC_BRC_LCU_UPDATE;
2344 int media_function = GEN10_HEVC_MEDIA_STATE_BRC_LCU_UPDATE;
2345 uint32_t resolution_x, resolution_y;
2346 struct gpe_media_object_walker_parameter media_object_walker_param;
2347 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2349 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2351 gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
2353 gen8_gpe_context_init(ctx, gpe_context);
2354 gen9_gpe_reset_binding_table(ctx, gpe_context);
2356 gen10_hevc_enc_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context, 1);
2357 gen10_hevc_enc_brc_lcu_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2359 gen8_gpe_setup_interface_data(ctx, gpe_context);
2361 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2363 resolution_x = ALIGN(hevc_state->frame_width, 16) >> 4;
2364 resolution_x = ALIGN(resolution_x, 16) >> 4;
2365 resolution_y = ALIGN(hevc_state->frame_height, 16) >> 4;
2366 resolution_y = ALIGN(resolution_y, 8) >> 3;
2367 kernel_walker_param.resolution_x = resolution_x;
2368 kernel_walker_param.resolution_y = resolution_y;
2369 kernel_walker_param.no_dependency = 1;
2371 gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2373 gen10_run_kernel_media_object_walker(ctx, encoder_context,
2376 &media_object_walker_param);
2380 gen10_hevc_enc_scaling_curbe(VADriverContextP ctx,
2381 struct encode_state *encode_state,
2382 struct intel_encoder_context *encoder_context,
2383 struct i965_gpe_context *gpe_context,
2384 struct gen10_hevc_scaling_conversion_param *scale_param)
2386 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2387 struct gen10_hevc_enc_state *hevc_state;
2388 gen10_hevc_scaling_curbe_data *scaling_curbe;
2390 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2391 scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
2396 memset(scaling_curbe, 0, sizeof(gen10_hevc_scaling_curbe_data));
2398 scaling_curbe->dw0.input_bit_depth_for_chroma = 10;
2399 scaling_curbe->dw0.input_bit_depth_for_luma = 10;
2400 scaling_curbe->dw0.output_bit_depth_for_chroma = 8;
2401 scaling_curbe->dw0.output_bit_depth_for_luma = 8;
2402 scaling_curbe->dw0.rounding_enabled = 1;
2404 scaling_curbe->dw1.convert_flag = scale_param->scale_flag.conv_enable;
2405 scaling_curbe->dw1.downscale_stage = scale_param->scale_flag.ds_type;
2406 scaling_curbe->dw1.mb_statistics_dump_flag = scale_param->scale_flag.dump_enable;
2407 if (scale_param->scale_flag.is_64lcu) {
2408 scaling_curbe->dw1.lcu_size = 0;
2409 scaling_curbe->dw1.job_queue_size = 32;
2411 scaling_curbe->dw1.lcu_size = 1;
2412 scaling_curbe->dw1.job_queue_size = 2656;
2415 scaling_curbe->dw2.orig_pic_width_in_pixel = hevc_state->frame_width;
2416 scaling_curbe->dw2.orig_pic_height_in_pixel = hevc_state->frame_height;
2418 scaling_curbe->dw3.bti_input_conversion_surface = GEN10_HEVC_SCALING_10BIT_Y;
2419 scaling_curbe->dw4.bti_input_ds_surface = GEN10_HEVC_SCALING_8BIT_Y;
2420 scaling_curbe->dw5.bti_4x_ds_surface = GEN10_HEVC_SCALING_4xDS;
2421 scaling_curbe->dw6.bti_mbstat_surface = GEN10_HEVC_SCALING_MB_STATS;
2422 scaling_curbe->dw7.bti_2x_ds_surface = GEN10_HEVC_SCALING_2xDS;
2423 scaling_curbe->dw8.bti_mb_split_surface = GEN10_HEVC_SCALING_MB_SPLIT_SURFACE;
2424 scaling_curbe->dw9.bti_lcu32_jobqueue_buffer_surface = GEN10_HEVC_SCALING_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
2425 scaling_curbe->dw10.bti_lcu64_lcu32_jobqueue_buffer_surface = GEN10_HEVC_SCALING_LCU64_JOB_QUEUE_SCRATCH_SURFACE;
2426 scaling_curbe->dw11.bti_lcu64_cu32_distortion_surface = GEN10_HEVC_SCALING_LCU64_64x64_DISTORTION_SURFACE;
2428 i965_gpe_context_unmap_curbe(gpe_context);
2432 gen10_hevc_enc_scaling_surfaces(VADriverContextP ctx,
2433 struct encode_state *encode_state,
2434 struct intel_encoder_context *encoder_context,
2435 struct i965_gpe_context *gpe_context,
2436 struct gen10_hevc_scaling_conversion_param *scale_param)
2438 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2440 struct object_surface *obj_surface;
2442 if (scale_param->scale_flag.conv_enable) {
2443 obj_surface = scale_param->input_surface;
2444 i965_add_2d_gpe_surface(ctx,
2449 I965_SURFACEFORMAT_R32_UNORM,
2453 i965_add_2d_gpe_surface(ctx,
2458 I965_SURFACEFORMAT_R16G16_UNORM,
2462 obj_surface = scale_param->converted_output_surface;
2463 i965_add_2d_gpe_surface(ctx,
2468 I965_SURFACEFORMAT_R8_UNORM,
2471 i965_add_2d_gpe_surface(ctx,
2476 I965_SURFACEFORMAT_R16_UINT,
2481 obj_surface = scale_param->input_surface;
2482 i965_add_2d_gpe_surface(ctx,
2487 I965_SURFACEFORMAT_R32_UNORM,
2491 i965_add_2d_gpe_surface(ctx,
2496 I965_SURFACEFORMAT_R16_UINT,
2501 if (scale_param->scale_flag.ds_type == GEN10_4X_DS ||
2502 scale_param->scale_flag.ds_type == GEN10_16X_DS ||
2503 scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2504 obj_surface = scale_param->scaled_4x_surface;
2506 i965_add_2d_gpe_surface(ctx,
2511 I965_SURFACEFORMAT_R32_UNORM,
2517 i965_add_buffer_2d_gpe_surface(ctx,
2519 &vme_context->res_mb_stat_surface,
2521 I965_SURFACEFORMAT_R8_UNORM,
2525 if (scale_param->scale_flag.ds_type == GEN10_2X_DS ||
2526 scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2527 obj_surface = scale_param->scaled_2x_surface;
2529 i965_add_2d_gpe_surface(ctx,
2534 I965_SURFACEFORMAT_R32_UNORM,
2540 i965_add_buffer_2d_gpe_surface(ctx,
2542 &vme_context->res_mb_split_surface,
2544 I965_SURFACEFORMAT_R8_UNORM,
2548 i965_add_buffer_gpe_surface(ctx,
2550 &vme_context->res_jbq_header_buffer,
2552 BYTES2UINT32(vme_context->res_jbq_header_buffer.size),
2557 i965_add_buffer_gpe_surface(ctx,
2559 &vme_context->res_jbq_header_lcu64_buffer,
2561 BYTES2UINT32(vme_context->res_jbq_header_lcu64_buffer.size),
2566 i965_add_buffer_gpe_surface(ctx,
2568 &vme_context->res_64x64_dist_buffer,
2570 BYTES2UINT32(vme_context->res_64x64_dist_buffer.size),
2577 gen10_hevc_enc_scaling_kernel(VADriverContextP ctx,
2578 struct encode_state *encode_state,
2579 struct intel_encoder_context *encoder_context,
2580 struct gen10_hevc_scaling_conversion_param *scale_param)
2582 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2583 struct gen10_hevc_enc_state *hevc_state;
2584 struct i965_gpe_context *gpe_context;
2586 struct gpe_media_object_walker_parameter media_object_walker_param;
2587 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2589 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2591 gpe_context = &(vme_context->scaling_context.gpe_context);
2593 gen8_gpe_context_init(ctx, gpe_context);
2594 gen9_gpe_reset_binding_table(ctx, gpe_context);
2596 gen10_hevc_enc_scaling_curbe(ctx, encode_state, encoder_context, gpe_context, scale_param);
2597 gen10_hevc_enc_scaling_surfaces(ctx, encode_state, encoder_context, gpe_context, scale_param);
2599 gen8_gpe_setup_interface_data(ctx, gpe_context);
2601 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2602 if (scale_param->scale_flag.ds_type == GEN10_NONE_DS) {
2603 kernel_walker_param.resolution_x = hevc_state->frame_width >> 3;
2604 kernel_walker_param.resolution_y = hevc_state->frame_height >> 3;
2605 media_function = GEN10_HEVC_MEDIA_STATE_NO_SCALING;
2606 } else if (scale_param->scale_flag.ds_type == GEN10_2X_DS) {
2607 kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width >> 1, 64) >> 3;
2608 kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height >> 1, 64) >> 3;
2609 media_function = GEN10_HEVC_MEDIA_STATE_2X_SCALING;
2610 } else if (scale_param->scale_flag.ds_type == GEN10_4X_DS ||
2611 scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2612 kernel_walker_param.resolution_x = hevc_state->frame_width_4x >> 3;
2613 kernel_walker_param.resolution_y = hevc_state->frame_height_4x >> 3;
2615 if (scale_param->scale_flag.ds_type == GEN10_4X_DS)
2616 media_function = GEN10_HEVC_MEDIA_STATE_4X_SCALING;
2618 media_function = GEN10_HEVC_MEDIA_STATE_2X_4X_SCALING;
2620 kernel_walker_param.resolution_x = hevc_state->frame_width_16x >> 3;
2621 kernel_walker_param.resolution_y = hevc_state->frame_height_16x >> 3;
2623 media_function = GEN10_HEVC_MEDIA_STATE_16X_SCALING;
2625 kernel_walker_param.no_dependency = 1;
2627 gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2629 gen10_run_kernel_media_object_walker(ctx, encoder_context,
2632 &media_object_walker_param);
2636 gen10_hevc_enc_conv_scaling_surface(VADriverContextP ctx,
2637 struct encode_state *encode_state,
2638 struct intel_encoder_context *encoder_context,
2639 struct object_surface *input_surface,
2640 struct object_surface *obj_surface,
2641 int only_for_reference)
2643 struct gen10_hevc_enc_context *vme_context;
2644 struct gen10_hevc_enc_state *hevc_state;
2645 struct gen10_hevc_scaling_conversion_param scale_param;
2646 struct gen10_hevc_surface_priv *surface_priv;
2648 vme_context = encoder_context->vme_context;
2649 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2650 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
2652 if (!hevc_state->is_10bit &&
2653 !hevc_state->is_64lcu &&
2654 !hevc_state->hme_supported)
2657 memset(&scale_param, 0, sizeof(scale_param));
2659 scale_param.input_surface = input_surface ? input_surface : obj_surface;
2660 scale_param.input_width = hevc_state->frame_width;
2661 scale_param.input_height = hevc_state->frame_height;
2662 scale_param.output_4x_width = hevc_state->frame_width_4x;
2663 scale_param.output_4x_height = hevc_state->frame_height_4x;
2664 scale_param.scaled_2x_surface = surface_priv->scaled_2x_surface;
2665 scale_param.scaled_4x_surface = surface_priv->scaled_4x_surface;
2666 scale_param.converted_output_surface = surface_priv->converted_surface;
2668 if (hevc_state->is_10bit)
2669 scale_param.scale_flag.conv_enable = GEN10_DEPTH_CONV_ENABLE;
2671 scale_param.scale_flag.is_64lcu = hevc_state->is_64lcu;
2673 scale_param.scale_flag.dump_enable = 0;
2674 if (hevc_state->is_64lcu && hevc_state->hme_supported) {
2675 scale_param.scale_flag.ds_type = GEN10_2X_4X_DS;
2676 scale_param.scale_flag.dump_enable = hevc_state->brc.brc_enabled ? 1 : 0;
2677 } else if (hevc_state->is_64lcu)
2678 scale_param.scale_flag.ds_type = GEN10_2X_DS;
2679 else if (hevc_state->hme_supported) {
2680 scale_param.scale_flag.ds_type = GEN10_4X_DS;
2681 scale_param.scale_flag.dump_enable = hevc_state->brc.brc_enabled ? 1 : 0;
2683 scale_param.scale_flag.ds_type = GEN10_NONE_DS;
2685 gen10_hevc_enc_scaling_kernel(ctx, encode_state,
2689 if (only_for_reference)
2690 surface_priv->conv_scaling_done = 1;
2692 if (!hevc_state->b16xme_supported ||
2696 memset(&scale_param, 0, sizeof(scale_param));
2698 scale_param.input_surface = surface_priv->scaled_4x_surface;
2699 scale_param.scaled_4x_surface = surface_priv->scaled_16x_surface;
2700 scale_param.input_width = hevc_state->frame_width_4x;
2701 scale_param.input_height = hevc_state->frame_height_4x;
2702 scale_param.output_4x_width = hevc_state->frame_width_16x;
2703 scale_param.output_4x_height = hevc_state->frame_height_16x;
2705 scale_param.scale_flag.ds_type = GEN10_16X_DS;
2707 gen10_hevc_enc_scaling_kernel(ctx, encode_state,
2712 #define GEN10_HEVC_HME_STAGE_4X_NO_16X 0
2713 #define GEN10_HEVC_HME_STAGE_4X_AFTER_16X 1
2714 #define GEN10_HEVC_HME_STAGE_16X 2
2717 gen10_hevc_enc_me_curbe(VADriverContextP ctx,
2718 struct encode_state *encode_state,
2719 struct intel_encoder_context *encoder_context,
2720 struct i965_gpe_context *gpe_context,
2724 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2725 struct gen10_hevc_enc_state *hevc_state;
2726 gen10_hevc_me_curbe_data *me_curbe;
2727 VAEncSliceParameterBufferHEVC *slice_param;
2728 VAEncSequenceParameterBufferHEVC *seq_param;
2730 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2732 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2733 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2735 me_curbe = i965_gpe_context_map_curbe(gpe_context);
2740 memset(me_curbe, 0, sizeof(gen10_hevc_me_curbe_data));
2742 me_curbe->dw0.rounded_frame_width_in_mv_for4x = hevc_state->frame_width_4x >> 3;
2743 me_curbe->dw0.rounded_frame_height_in_mv_for4x = hevc_state->frame_height_4x >> 3;
2745 me_curbe->dw2.sub_pel_mode = 3;
2746 me_curbe->dw2.bme_disable_fbr = 1;
2747 me_curbe->dw2.inter_sad_adj = 2;
2749 me_curbe->dw3.adaptive_search_en = 1;
2750 me_curbe->dw3.ime_ref_window_size = 1; // From the HW-spec
2752 me_curbe->dw4.quarter_quad_tree_cand = 1; // 32x32 split is enabled.
2753 me_curbe->dw4.bi_weight = 32; // default weight.
2755 me_curbe->dw5.len_sp = 0x3F;
2756 me_curbe->dw5.max_num_su = 0x3F;
2757 me_curbe->dw5.start_center0_x = ((gen10_hevc_ime_ref_window_size[1][0] - 32) >> 3) & 0xF;
2758 me_curbe->dw5.start_center0_y = ((gen10_hevc_ime_ref_window_size[1][1] - 32) >> 3) & 0xF;
2760 me_curbe->dw6.slice_type = (dist_type == GEN10_HEVC_ME_DIST_TYPE_INTER_BRC) ? 1 : 0;
2761 if (dist_type == GEN10_HEVC_ME_DIST_TYPE_INTER_BRC) {
2762 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2763 me_curbe->dw6.hme_stage =
2764 (hevc_state->b16xme_enabled) ? GEN10_HEVC_HME_STAGE_4X_AFTER_16X :
2765 GEN10_HEVC_HME_STAGE_4X_NO_16X;
2767 me_curbe->dw6.hme_stage = GEN10_HEVC_HME_STAGE_16X;
2769 me_curbe->dw6.hme_stage = GEN10_HEVC_HME_STAGE_4X_NO_16X;
2771 if (slice_param->slice_type == HEVC_SLICE_I) {
2772 me_curbe->dw6.num_ref_l0 = 0;
2773 me_curbe->dw6.num_ref_l1 = 0;
2774 } else if (slice_param->slice_type == HEVC_SLICE_P) {
2775 me_curbe->dw6.num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
2776 me_curbe->dw6.num_ref_l1 = 0;
2778 me_curbe->dw6.num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
2779 me_curbe->dw6.num_ref_l1 = hevc_state->low_delay ? 0 : slice_param->num_ref_idx_l1_active_minus1 + 1;
2782 me_curbe->dw7.rounded_frame_width_in_mv_for16x = hevc_state->frame_width_16x >> 3;
2783 me_curbe->dw7.rounded_frame_height_in_mv_for16x = hevc_state->frame_height_16x >> 3;
2786 memcpy(&me_curbe->ime_search_path_03, gen10_hevc_me_search_path,
2787 sizeof(gen10_hevc_me_search_path));
2789 me_curbe->dw24.coding_unit_size = 1;
2790 me_curbe->dw24.coding_unit_partition_mode = 0;
2791 me_curbe->dw24.coding_unit_prediction_mode = 1;
2793 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2794 me_curbe->dw25.frame_width_in_pixel_cs = hevc_state->frame_width >> 2;
2795 me_curbe->dw25.frame_height_in_pixel_cs = hevc_state->frame_height >> 2;
2797 me_curbe->dw25.frame_width_in_pixel_cs = hevc_state->frame_width >> 4;
2798 me_curbe->dw25.frame_height_in_pixel_cs = hevc_state->frame_height >> 4;
2801 me_curbe->dw27.intra_compute_type = 1;
2803 me_curbe->dw28.penalty_intra32x32_nondc = 36;
2804 me_curbe->dw28.penalty_intra16x16_nondc = 12;
2805 me_curbe->dw28.penalty_intra8x8_nondc = 4;
2807 me_curbe->dw30.mode4_cost = 13;
2808 me_curbe->dw30.mode5_cost = 9;
2809 me_curbe->dw30.mode6_cost = 13;
2810 me_curbe->dw30.mode7_cost = 3;
2811 me_curbe->dw31.mode8_cost = 9;
2813 me_curbe->dw32.sicintra_neighbor_avail_flag = 0x3F;
2814 me_curbe->dw32.sic_inter_sad_measure = 0x02;
2815 me_curbe->dw32.sic_intra_sad_measure = 0x02;
2817 me_curbe->dw33.sic_log2_min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
2819 me_curbe->dw34.bti_hme_output_mv_data_surface = GEN10_HEVC_HME_OUTPUT_MV_DATA;
2820 me_curbe->dw35.bti_16xinput_mv_data_surface = GEN10_HEVC_HME_16xINPUT_MV_DATA;
2821 me_curbe->dw36.bti_4x_output_distortion_surface = GEN10_HEVC_HME_4xOUTPUT_DISTORTION;
2822 me_curbe->dw37.bti_vme_input_surface = GEN10_HEVC_HME_VME_PRED_CURR_PIC_IDX0;
2823 me_curbe->dw38.bti_4xds_surface = GEN10_HEVC_HME_4xDS_INPUT;
2824 me_curbe->dw39.bti_brc_distortion_surface = GEN10_HEVC_HME_BRC_DISTORTION;
2825 me_curbe->dw40.bti_mv_and_distortion_sum_surface = GEN10_HEVC_HME_MV_AND_DISTORTION_SUM;
2827 i965_gpe_context_unmap_curbe(gpe_context);
2831 gen10_hevc_enc_me_surfaces(VADriverContextP ctx,
2832 struct encode_state *encode_state,
2833 struct intel_encoder_context *encoder_context,
2834 struct i965_gpe_context *gpe_context,
2838 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2839 struct gen10_hevc_enc_state *hevc_state;
2840 struct gen10_hevc_enc_frame_info *frame_info;
2841 struct gen10_hevc_enc_common_res *common_res;
2842 struct object_surface *obj_surface, *vme_surface;
2843 struct gen10_hevc_surface_priv *surface_priv;
2844 struct i965_gpe_resource *res_source;
2847 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2848 frame_info = &vme_context->frame_info;
2849 common_res = &vme_context->common_res;
2851 obj_surface = encode_state->reconstructed_object;
2853 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
2855 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2856 vme_surface = surface_priv->scaled_4x_surface;
2857 res_source = &vme_context->res_s4x_memv_data_surface;
2859 vme_surface = surface_priv->scaled_16x_surface;
2860 res_source = &vme_context->res_s16x_memv_data_surface;
2864 i965_add_buffer_2d_gpe_surface(ctx, gpe_context, res_source,
2865 1, I965_SURFACEFORMAT_R8_UNORM,
2866 GEN10_HEVC_HME_OUTPUT_MV_DATA);
2868 if (hme_level == GEN10_HEVC_HME_LEVEL_4X && hevc_state->b16xme_enabled)
2869 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2870 &vme_context->res_s16x_memv_data_surface,
2871 1, I965_SURFACEFORMAT_R8_UNORM,
2872 GEN10_HEVC_HME_16xINPUT_MV_DATA);
2874 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2875 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2876 &vme_context->res_s4x_me_dist_surface,
2877 1, I965_SURFACEFORMAT_R8_UNORM,
2878 GEN10_HEVC_HME_4xOUTPUT_DISTORTION);
2880 input_bti = GEN10_HEVC_HME_VME_PRED_CURR_PIC_IDX0;
2882 i965_add_adv_gpe_surface(ctx, gpe_context,
2887 for (i = 0; i < 4; i++) {
2888 struct object_surface *tmp_surface, *input_surface;
2889 struct gen10_hevc_surface_priv *tmp_hevc_surface;
2891 if (frame_info->mapped_ref_idx_list0[i] >= 0)
2892 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
2896 if (tmp_surface && tmp_surface->private_data) {
2897 tmp_hevc_surface = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
2899 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2900 input_surface = tmp_hevc_surface->scaled_4x_surface;
2902 input_surface = tmp_hevc_surface->scaled_16x_surface;
2904 i965_add_adv_gpe_surface(ctx, gpe_context,
2908 i965_add_adv_gpe_surface(ctx, gpe_context,
2912 if (frame_info->mapped_ref_idx_list1[i] >= 0)
2913 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
2917 if (tmp_surface && tmp_surface->private_data) {
2918 tmp_hevc_surface = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
2920 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2921 input_surface = tmp_hevc_surface->scaled_4x_surface;
2923 input_surface = tmp_hevc_surface->scaled_16x_surface;
2925 i965_add_adv_gpe_surface(ctx, gpe_context,
2927 input_bti + 2 * i + 1);
2929 i965_add_adv_gpe_surface(ctx, gpe_context,
2931 input_bti + 2 * i + 1);
2934 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2935 i965_add_2d_gpe_surface(ctx,
2940 I965_SURFACEFORMAT_R8_UNORM,
2941 GEN10_HEVC_HME_4xDS_INPUT);
2943 if (dist_type != GEN10_HEVC_ME_DIST_TYPE_INTRA)
2944 res_source = &vme_context->res_brc_me_dist_surface;
2946 res_source = &vme_context->res_brc_intra_dist_surface;
2948 i965_add_buffer_2d_gpe_surface(ctx, gpe_context, res_source,
2949 1, I965_SURFACEFORMAT_R8_UNORM,
2950 GEN10_HEVC_HME_BRC_DISTORTION);
2953 i965_add_buffer_gpe_surface(ctx, gpe_context,
2954 &vme_context->res_mv_dist_sum_buffer,
2956 vme_context->res_mv_dist_sum_buffer.size,
2958 GEN10_HEVC_HME_MV_AND_DISTORTION_SUM);
2962 gen10_hevc_enc_me_kernel(VADriverContextP ctx,
2963 struct encode_state *encode_state,
2964 struct intel_encoder_context *encoder_context,
2968 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2969 struct gen10_hevc_enc_state *hevc_state;
2970 struct i965_gpe_context *gpe_context;
2972 struct gpe_media_object_walker_parameter media_object_walker_param;
2973 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2975 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2977 gpe_context = &(vme_context->me_context.gpe_context);
2979 gen8_gpe_context_init(ctx, gpe_context);
2980 gen9_gpe_reset_binding_table(ctx, gpe_context);
2982 gen10_hevc_enc_me_curbe(ctx, encode_state, encoder_context, gpe_context, hme_level, dist_type);
2983 gen10_hevc_enc_me_surfaces(ctx, encode_state, encoder_context, gpe_context, hme_level, dist_type);
2985 gen8_gpe_setup_interface_data(ctx, gpe_context);
2987 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2989 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2990 kernel_walker_param.resolution_x = hevc_state->frame_width_4x >> 5;
2991 kernel_walker_param.resolution_y = hevc_state->frame_height_4x >> 5;
2993 media_function = GEN10_HEVC_MEDIA_STATE_4XME;
2995 kernel_walker_param.resolution_x = hevc_state->frame_width_16x >> 5;
2996 kernel_walker_param.resolution_y = hevc_state->frame_height_16x >> 5;
2998 media_function = GEN10_HEVC_MEDIA_STATE_16XME;
3001 kernel_walker_param.no_dependency = 1;
3003 gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3005 gen10_run_kernel_media_object_walker(ctx, encoder_context,
3008 &media_object_walker_param);
3011 #define LUTMODE_INTRA_NONPRED_HEVC 0x00
3012 #define LUTMODE_INTRA_32x32_HEVC 0x01
3013 #define LUTMODE_INTRA_16x16_HEVC 0x02
3014 #define LUTMODE_INTRA_8x8_HEVC 0x03
3015 #define LUTMODE_INTER_32x16_HEVC 0x04
3016 #define LUTMODE_INTER_16x32_HEVC 0x04
3017 #define LUTMODE_INTER_AMP_HEVC 0x04
3018 #define LUTMODE_INTER_16x16_HEVC 0x05
3019 #define LUTMODE_INTER_16x8_HEVC 0x06
3020 #define LUTMODE_INTER_8x16_HEVC 0x06
3021 #define LUTMODE_INTER_8x8_HEVC 0x07
3022 #define LUTMODE_INTER_32x32_HEVC 0x08
3023 #define LUTMODE_INTER_BIDIR_HEVC 0x09
3024 #define LUTMODE_REF_ID_HEVC 0x0A
3025 #define LUTMODE_INTRA_CHROMA_HEVC 0x0B
3027 #define LAMBDA_RD_IDX 0x10
3028 #define LAMBDA_MD_IDX 0x11
3029 #define TUSAD_THR_IDX 0x12
3031 #define MAX_MODE_COST 0x20
3034 map_44_lut_value(uint32_t value,
3037 uint32_t max_cost = 0;
3044 max_cost = ((max & 15) << (max >> 4));
3045 if (value >= max_cost)
3048 data = (int)(log((double)value) / log(2.)) - 3;
3052 ret = (uint8_t)((data << 4) +
3053 (int)((value + (data == 0 ? 0 : (1 << (data - 1)))) >> data));
3054 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
3060 gen10_hevc_calc_costs(uint32_t *mode_cost, int slice_type, int qp, bool b_lcu64)
3062 unsigned short lambda_md;
3063 unsigned int lambda_rd;
3064 unsigned int tu_sad_thres;
3067 double intra_weigh_factor;
3068 double inter_weigh_factor;
3069 double qp_scale, cost_scale;
3075 if (slice_type == HEVC_SLICE_I) {
3092 lambda = sqrt(qp_scale * pow(2.0, qp_value / 3.0));
3093 lambda_rd = (unsigned int)(qp_scale * pow(2.0, qp_value / 3.0) * 256 + 0.5);
3094 lambda_md = (unsigned short)(lambda * 256 + 0.5);
3095 tu_sad_thres = (unsigned int)(sqrt(0.85 * pow(2.0, qp_value / 3.0)) * 0.4 * 256 + 0.5);
3097 inter_weigh_factor = cost_scale * lambda;
3098 intra_weigh_factor = inter_weigh_factor * gen10_hevc_lambda_factor[slice_type][qp];
3100 mode_cost[LAMBDA_RD_IDX] = lambda_rd;
3101 mode_cost[LAMBDA_MD_IDX] = lambda_md;
3102 mode_cost[TUSAD_THR_IDX] = tu_sad_thres;
3104 mode_cost[LUTMODE_INTRA_NONPRED_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_NONPRED_HEVC]), 0x6f);
3105 mode_cost[LUTMODE_INTRA_32x32_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_32x32_HEVC]), 0x8f);
3106 mode_cost[LUTMODE_INTRA_16x16_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_16x16_HEVC]), 0x8f);
3107 mode_cost[LUTMODE_INTRA_8x8_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_8x8_HEVC]), 0x8f);
3108 mode_cost[LUTMODE_INTRA_CHROMA_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_CHROMA_HEVC]), 0x6f);
3110 mode_cost[LUTMODE_INTER_32x32_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_32x32_HEVC]), 0x8f);
3111 mode_cost[LUTMODE_INTER_32x16_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_32x16_HEVC]), 0x8f);
3112 mode_cost[LUTMODE_INTER_16x16_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_16x16_HEVC]), 0x6f);
3113 mode_cost[LUTMODE_INTER_16x8_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_16x8_HEVC]), 0x6f);
3114 mode_cost[LUTMODE_INTER_8x8_HEVC] = map_44_lut_value((uint32_t)(0.45 * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_8x8_HEVC]), 0x6f);
3116 mode_cost[LUTMODE_INTER_BIDIR_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_BIDIR_HEVC]), 0x6f);
3117 if (slice_type != HEVC_SLICE_I)
3118 mode_cost[LUTMODE_REF_ID_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_REF_ID_HEVC]), 0x6f);
3120 mode_cost[LUTMODE_REF_ID_HEVC] = 0;
3124 gen10_hevc_enc_generate_regions_in_slice_control(VADriverContextP ctx,
3125 struct encode_state *encode_state,
3126 struct intel_encoder_context *encoder_context)
3128 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3129 struct gen10_hevc_enc_state *hevc_state;
3130 VAEncSliceParameterBufferHEVC *slice_param;
3131 gen10_hevc_concurrent_tg_data *pregion;
3132 int i, k, slice, num_regions, height, num_slices;
3133 int num_wf_in_region;
3134 uint32_t frame_width_in_ctb, frame_height_in_ctb;
3135 bool is_arbitary_slices;
3136 int slice_starty[I965_MAX_NUM_SLICE + 1];
3137 int regions_start_table[64];
3138 uint32_t start_offset_to_region[16];
3139 int16_t data_tmp[32][32];
3142 int copy_blk_size = 0;
3144 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3146 memset(slice_starty, 0, sizeof(slice_starty));
3147 memset(regions_start_table, 0, sizeof(regions_start_table));
3148 memset(data_tmp, 0, sizeof(data_tmp));
3149 memset(&hevc_state->hevc_wf_param, 0, sizeof(hevc_state->hevc_wf_param));
3150 memset(start_offset_to_region, 0, sizeof(start_offset_to_region));
3152 frame_width_in_ctb = vme_context->frame_info.width_in_lcu;
3153 frame_height_in_ctb = vme_context->frame_info.height_in_lcu;
3154 if (hevc_state->is_64lcu) {
3162 is_arbitary_slices = false;
3163 for (slice = 0; slice < encode_state->num_slice_params_ext; slice++) {
3165 if (encode_state->slice_params_ext[slice] &&
3166 encode_state->slice_params_ext[slice]->buffer)
3167 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice]->buffer;
3172 if (slice_param->slice_segment_address % frame_width_in_ctb) {
3173 is_arbitary_slices = true;
3175 slice_starty[slice] = slice_param->slice_segment_address / frame_width_in_ctb;
3179 slice_starty[encode_state->num_slice_params_ext] = frame_height_in_ctb;
3181 regions_start_table[0] = 0;
3182 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + 0] = 0;
3185 if (is_arbitary_slices) {
3186 height = frame_height_in_ctb;
3188 max_height = height;
3189 if (hevc_state->num_regions_in_slice > 1) {
3190 num_wf_in_region = (frame_width_in_ctb + 2 * (frame_height_in_ctb - 1) + hevc_state->num_regions_in_slice - 1) /
3191 hevc_state->num_regions_in_slice;
3193 num_regions = hevc_state->num_regions_in_slice;
3195 for (i = 1; i < hevc_state->num_regions_in_slice; i++) {
3196 int front = i * num_wf_in_region;
3198 if (front < frame_width_in_ctb) {
3199 regions_start_table[i] = front;
3200 } else if (((front - frame_width_in_ctb + 1) & 1) == 0) {
3201 regions_start_table[i] = frame_width_in_ctb - 1;
3203 regions_start_table[i] = frame_width_in_ctb - 2;
3206 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + i] = (front - regions_start_table[i]) >> 1;
3210 int start_y = 0, slice_height;
3211 int slice_is_merged = 0;
3214 num_slices = encode_state->num_slice_params_ext;
3216 for (slice = 0; slice < num_slices; slice++) {
3217 slice_height = slice_starty[slice + 1] - slice_starty[slice];
3219 if (slice_height > max_height)
3220 max_height = slice_height;
3223 while (!slice_is_merged) {
3224 int new_num_slices = 1;
3228 for (slice = 1; slice < num_slices; slice++) {
3229 if ((slice_starty[slice + 1] - start_y) <= max_height) {
3230 slice_starty[slice] = -1;
3232 start_y = slice_starty[slice];
3236 for (slice = 1; slice < num_slices; slice++) {
3237 if (slice_starty[slice] > 0) {
3238 slice_starty[new_num_slices] = slice_starty[slice];
3243 num_slices = new_num_slices;
3244 slice_starty[num_slices] = frame_height_in_ctb;
3246 if (num_slices * hevc_state->num_regions_in_slice <= 16) {
3247 slice_is_merged = 1;
3251 max_height = frame_height_in_ctb;
3253 for (slice = 0; slice < num_slices - 1; slice++) {
3254 if ((slice_starty[slice + 2] - slice_starty[slice]) <= max_height) {
3255 max_height = slice_starty[slice + 2] - slice_starty[slice];
3260 for (slice = num; slice < num_slices; slice++)
3261 slice_starty[slice] = slice_starty[slice + 1];
3267 num_wf_in_region = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) /
3268 hevc_state->num_regions_in_slice;
3269 num_regions = num_slices * hevc_state->num_regions_in_slice;
3271 for (slice = 0; slice < num_slices; slice++) {
3272 regions_start_table[slice * hevc_state->num_regions_in_slice] = 0;
3273 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (slice * hevc_state->num_regions_in_slice)] = slice_starty[slice];
3275 for (i = 1; i < hevc_state->num_regions_in_slice; i++) {
3276 int front = i * num_wf_in_region;
3278 if (front < frame_width_in_ctb)
3279 regions_start_table[slice * hevc_state->num_regions_in_slice + i] = front;
3280 else if (((front - frame_width_in_ctb + 1) & 1) == 0)
3281 regions_start_table[slice * hevc_state->num_regions_in_slice + i] = frame_width_in_ctb - 1;
3283 regions_start_table[slice * hevc_state->num_regions_in_slice + i] = frame_width_in_ctb - 2;
3285 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (slice * hevc_state->num_regions_in_slice + i)] = slice_starty[slice] +
3286 ((front - regions_start_table[i]) >> 1);
3289 height = max_height;
3292 for (k = 0; k < num_slices; k++) {
3293 int nearest_reg = 0, delta, tmp_y;
3294 int min_delta = hevc_state->frame_height;
3295 int cur_lcu_pel_y = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (k * hevc_state->num_regions_in_slice)] << log2_lcu_size;
3296 int ts_width = frame_width_in_ctb;
3297 int ts_height = height;
3298 int offset_y = -((ts_width + 1) >> 1);
3299 int offset_delta = ((ts_width + ((ts_height - 1) << 1)) + (hevc_state->num_regions_in_slice - 1)) / (hevc_state->num_regions_in_slice);
3301 for (i = 0; i < num_regions; i++) {
3302 if (regions_start_table[i] == 0) {
3303 delta = cur_lcu_pel_y - (regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + i] << log2_lcu_size);
3306 if (delta < min_delta) {
3313 start_offset_to_region[k] = 2 * regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg];
3315 for (i = 0; i < hevc_state->num_regions_in_slice; i++) {
3316 data_tmp[k * hevc_state->num_regions_in_slice + i][0] = slice_starty[k] * frame_width_in_ctb;
3317 data_tmp[k * hevc_state->num_regions_in_slice + i][1] = (k == (num_slices - 1)) ?
3318 frame_width_in_ctb * frame_height_in_ctb : slice_starty[k + 1] * frame_width_in_ctb;
3319 data_tmp[k * hevc_state->num_regions_in_slice + i][2] = k * hevc_state->num_regions_in_slice + i;
3320 if (!hevc_state->is_64lcu && hevc_state->num_regions_in_slice == 1) {
3324 data_tmp[k * hevc_state->num_regions_in_slice + i][3] = height;
3325 data_tmp[k * hevc_state->num_regions_in_slice + i][4] = regions_start_table[nearest_reg + i];
3326 data_tmp[k * hevc_state->num_regions_in_slice + i][5] = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (nearest_reg + i)];
3327 data_tmp[k * hevc_state->num_regions_in_slice + i][6] = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg];
3328 tmp_y = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (nearest_reg + hevc_state->num_regions_in_slice)];
3329 data_tmp[k * hevc_state->num_regions_in_slice + i][7] = (tmp_y != 0) ? tmp_y : frame_height_in_ctb;
3330 data_tmp[k * hevc_state->num_regions_in_slice + i][8] = offset_y + regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg] + ((i * offset_delta) >> 1);
3331 if (hevc_state->is_64lcu) {
3332 data_tmp[k * hevc_state->num_regions_in_slice + i][9] = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
3333 data_tmp[k * hevc_state->num_regions_in_slice + i][10] = num_regions;
3339 pregion = (gen10_hevc_concurrent_tg_data *) i965_map_gpe_resource(&vme_context->res_concurrent_tg_data);
3343 memset(pregion, 0, vme_context->res_concurrent_tg_data.size);
3345 for (i = 0; i < 16; i++) {
3346 memcpy(pregion, data_tmp[i], copy_blk_size);
3350 hevc_state->hevc_wf_param.max_height_in_region = max_height;
3351 hevc_state->hevc_wf_param.num_regions = num_regions;
3352 hevc_state->hevc_wf_param.num_unit_in_wf = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
3354 i965_unmap_gpe_resource(&vme_context->res_concurrent_tg_data);
3358 gen10_hevc_enc_generate_lculevel_data(VADriverContextP ctx,
3359 struct encode_state *encode_state,
3360 struct intel_encoder_context *encoder_context)
3362 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3363 VAEncPictureParameterBufferHEVC *pic_param;
3364 VAEncSliceParameterBufferHEVC *slice_param;
3365 gen10_hevc_lcu_level_data *plcu_level_data;
3366 int ui_start_lcu, slice_idx, i;
3368 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3370 plcu_level_data = (gen10_hevc_lcu_level_data *)
3371 i965_map_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
3373 if (!plcu_level_data)
3377 for (ui_start_lcu = 0, slice_idx = 0; slice_idx < encode_state->num_slice_params_ext; slice_idx++) {
3380 if (encode_state->slice_params_ext[slice_idx] &&
3381 encode_state->slice_params_ext[slice_idx]->buffer)
3382 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_idx]->buffer;
3387 for (i = 0; i < slice_param->num_ctu_in_slice; i++, plcu_level_data++) {
3388 plcu_level_data->slice_start_lcu_idx = ui_start_lcu;
3389 plcu_level_data->slice_end_lcu_idx = ui_start_lcu + slice_param->num_ctu_in_slice;
3390 plcu_level_data->slice_id = slice_idx + 1;
3391 plcu_level_data->slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3394 ui_start_lcu += slice_param->num_ctu_in_slice;
3397 i965_unmap_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
3401 gen10_hevc_enc_mbenc_intra_curbe(VADriverContextP ctx,
3402 struct encode_state *encode_state,
3403 struct intel_encoder_context *encoder_context,
3404 struct i965_gpe_context *gpe_context)
3406 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3407 struct gen10_hevc_enc_state *hevc_state;
3408 gen10_hevc_mbenc_intra_curbe_data *mbenc_curbe;
3409 VAEncSliceParameterBufferHEVC *slice_param;
3410 VAEncPictureParameterBufferHEVC *pic_param;
3411 VAEncSequenceParameterBufferHEVC *seq_param;
3413 unsigned int mode_cost[MAX_MODE_COST];
3416 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3418 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
3419 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3420 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3421 mbenc_curbe = i965_gpe_context_map_curbe(gpe_context);
3426 memset(mbenc_curbe, 0, sizeof(gen10_hevc_mbenc_intra_curbe_data));
3428 if (hevc_state->brc.target_usage < 3)
3430 else if (hevc_state->brc.target_usage < 7)
3435 mbenc_curbe->dw0.frame_width_in_pixel = hevc_state->frame_width;
3436 mbenc_curbe->dw0.frame_height_in_pixel = hevc_state->frame_height;
3438 mbenc_curbe->dw1.penalty_intra32x32_nondc_pred = 36;
3439 mbenc_curbe->dw1.penalty_intra16x16_nondc_pred = 12;
3440 mbenc_curbe->dw1.penalty_intra8x8_nondc_pred = 4;
3442 mbenc_curbe->dw2.intra_sad_measure_adj = 2;
3443 slice_qp = slice_param->slice_qp_delta + pic_param->pic_init_qp;
3444 gen10_hevc_calc_costs(mode_cost, HEVC_SLICE_I, slice_qp, hevc_state->is_64lcu);
3446 mbenc_curbe->dw3.mode0_cost = mode_cost[0];
3447 mbenc_curbe->dw3.mode1_cost = mode_cost[1];
3448 mbenc_curbe->dw3.mode2_cost = mode_cost[2];
3449 mbenc_curbe->dw3.mode3_cost = mode_cost[3];
3451 mbenc_curbe->dw4.mode4_cost = mode_cost[4];
3452 mbenc_curbe->dw4.mode5_cost = mode_cost[5];
3453 mbenc_curbe->dw4.mode6_cost = mode_cost[6];
3454 mbenc_curbe->dw4.mode7_cost = mode_cost[7];
3456 mbenc_curbe->dw5.mode8_cost = mode_cost[8];
3457 mbenc_curbe->dw5.mode9_cost = mode_cost[9];
3458 mbenc_curbe->dw5.ref_id_cost = mode_cost[10];
3459 mbenc_curbe->dw5.chroma_intra_mode_cost = mode_cost[11];
3461 mbenc_curbe->dw6.log2_min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3462 mbenc_curbe->dw6.log2_max_cu_size = seq_param->log2_diff_max_min_luma_coding_block_size +
3463 seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3464 mbenc_curbe->dw6.log2_max_tu_size = seq_param->log2_diff_max_min_transform_block_size +
3465 seq_param->log2_min_transform_block_size_minus2 + 2;
3466 mbenc_curbe->dw6.log2_min_tu_size = seq_param->log2_min_transform_block_size_minus2 + 2;
3467 if (seq_param->max_transform_hierarchy_depth_intra)
3468 mbenc_curbe->dw6.max_tr_depth_intra = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTRA_TU_PARAM][tu_idx];
3470 mbenc_curbe->dw6.max_tr_depth_intra = 0;
3472 mbenc_curbe->dw6.tu_split_flag = 1;
3474 mbenc_curbe->dw7.concurrent_group_num = 1;
3475 mbenc_curbe->dw7.slice_qp = slice_qp;
3476 mbenc_curbe->dw7.enc_tu_decision_mode = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_MODE_TU_PARAM][tu_idx];
3478 mbenc_curbe->dw8.lambda_rd = mode_cost[LAMBDA_RD_IDX];
3479 mbenc_curbe->dw9.lambda_md = mode_cost[LAMBDA_MD_IDX];
3480 mbenc_curbe->dw10.intra_tusad_thr = mode_cost[TUSAD_THR_IDX];
3482 mbenc_curbe->dw11.slice_type = HEVC_SLICE_I;
3484 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP)
3485 mbenc_curbe->dw11.qp_type = GEN10_HEVC_QP_TYPE_CONSTANT;
3487 mbenc_curbe->dw11.qp_type = hevc_state->brc.lcu_brc_enabled ? GEN10_HEVC_QP_TYPE_CU_LEVEL : GEN10_HEVC_QP_TYPE_FRAME;
3489 mbenc_curbe->dw11.enc_qt_decision_mode = gen10_hevc_tu_settings[GEN10_ENC_QT_DECISION_MODE_TU_PARAM][tu_idx];
3491 mbenc_curbe->dw12.pcm_8x8_sad_threshold = 4700;
3493 mbenc_curbe->dw16.bti_vme_intra_pred_surface = GEN10_HEVC_MBENC_INTRA_VME_PRED_CURR_PIC_IDX0;
3494 mbenc_curbe->dw17.bti_curr_picture_y = GEN10_HEVC_MBENC_INTRA_CURR_Y;
3495 mbenc_curbe->dw18.bti_enc_curecord_surface = GEN10_HEVC_MBENC_INTRA_INTERMEDIATE_CU_RECORD;
3496 mbenc_curbe->dw19.bti_pak_obj_cmd_surface = GEN10_HEVC_MBENC_INTRA_PAK_OBJ0;
3497 mbenc_curbe->dw20.bti_cu_packet_for_pak_surface = GEN10_HEVC_MBENC_INTRA_PAK_CU_RECORD;
3498 mbenc_curbe->dw21.bti_internal_scratch_surface = GEN10_HEVC_MBENC_INTRA_SCRATCH_SURFACE;
3499 mbenc_curbe->dw22.bti_cu_based_qp_surface = GEN10_HEVC_MBENC_INTRA_CU_QP_DATA;
3500 mbenc_curbe->dw23.bti_const_data_lut_surface = GEN10_HEVC_MBENC_INTRA_CONST_DATA_LUT;
3501 mbenc_curbe->dw24.bti_lcu_level_data_input_surface = GEN10_HEVC_MBENC_INTRA_LCU_LEVEL_DATA_INPUT;
3502 mbenc_curbe->dw25.bti_concurrent_tg_data_surface = GEN10_HEVC_MBENC_INTRA_CONCURRENT_TG_DATA;
3503 mbenc_curbe->dw26.bti_brc_combined_enc_param_surface = GEN10_HEVC_MBENC_INTRA_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3504 mbenc_curbe->dw27.bti_cu_split_surface = GEN10_HEVC_MBENC_INTRA_CU_SPLIT_SURFACE,
3505 mbenc_curbe->dw28.bti_debug_surface = GEN10_HEVC_MBENC_INTRA_DEBUG_DUMP;
3507 i965_gpe_context_unmap_curbe(gpe_context);
3511 gen10_hevc_compute_diff_poc(VADriverContextP ctx,
3512 VAPictureHEVC *curr_pic,
3513 VAPictureHEVC *ref_pic)
3515 struct i965_driver_data *i965 = i965_driver_data(ctx);
3516 struct object_surface *obj_surface = NULL;
3519 if (ref_pic->picture_id != VA_INVALID_SURFACE)
3520 obj_surface = SURFACE(ref_pic->picture_id);
3522 if (!obj_surface || (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
3525 diff_poc = curr_pic->pic_order_cnt - ref_pic->pic_order_cnt;
3527 if (diff_poc < -128)
3529 else if (diff_poc > 127)
3536 gen10_hevc_enc_mbenc_inter_curbe(VADriverContextP ctx,
3537 struct encode_state *encode_state,
3538 struct intel_encoder_context *encoder_context,
3539 struct i965_gpe_context *gpe_context)
3541 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3542 struct gen10_hevc_enc_state *hevc_state;
3543 gen10_hevc_mbenc_inter_curbe_data *mbenc_curbe;
3544 VAEncSliceParameterBufferHEVC *slice_param;
3545 VAEncPictureParameterBufferHEVC *pic_param;
3546 VAEncSequenceParameterBufferHEVC *seq_param;
3550 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3552 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
3553 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3554 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3555 mbenc_curbe = i965_gpe_context_map_curbe(gpe_context);
3560 memset(mbenc_curbe, 0, sizeof(gen10_hevc_mbenc_inter_curbe_data));
3562 if (hevc_state->brc.target_usage < 3)
3564 else if (hevc_state->brc.target_usage < 7)
3569 slice_qp = slice_param->slice_qp_delta + pic_param->pic_init_qp;
3570 mbenc_curbe->dw0.frame_width_in_pixel = hevc_state->frame_width;
3571 mbenc_curbe->dw0.frame_height_in_pixel = hevc_state->frame_height;
3573 mbenc_curbe->dw1.log2_min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3574 mbenc_curbe->dw1.log2_max_cu_size = seq_param->log2_diff_max_min_luma_coding_block_size +
3575 seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3576 mbenc_curbe->dw1.log2_max_tu_size = seq_param->log2_diff_max_min_transform_block_size +
3577 seq_param->log2_min_transform_block_size_minus2 + 2;
3578 mbenc_curbe->dw1.log2_min_tu_size = seq_param->log2_min_transform_block_size_minus2 + 2;
3580 if (seq_param->max_transform_hierarchy_depth_intra)
3581 mbenc_curbe->dw1.max_tr_depth_intra = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTRA_TU_PARAM][tu_idx];
3583 mbenc_curbe->dw1.max_tr_depth_intra = 0;
3585 if (seq_param->max_transform_hierarchy_depth_inter)
3586 mbenc_curbe->dw1.max_tr_depth_inter = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTER_TU_PARAM][tu_idx];
3588 mbenc_curbe->dw1.max_tr_depth_inter = 0;
3589 mbenc_curbe->dw1.log2_para_merge_level = 2;
3590 mbenc_curbe->dw1.max_num_ime_search_center = 6;
3592 mbenc_curbe->dw2.hme_flag = hevc_state->hme_enabled ? 3 : 0;
3593 mbenc_curbe->dw2.super_hme_enable = hevc_state->b16xme_enabled ? 1 : 0;
3594 mbenc_curbe->dw2.hme_coarse_stage = 1;
3595 mbenc_curbe->dw2.hme_subpel_mode = 3;
3596 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP)
3597 mbenc_curbe->dw2.qp_type = GEN10_HEVC_QP_TYPE_CONSTANT;
3599 mbenc_curbe->dw2.qp_type = hevc_state->brc.lcu_brc_enabled ? GEN10_HEVC_QP_TYPE_CU_LEVEL : GEN10_HEVC_QP_TYPE_FRAME;
3601 if (hevc_state->num_regions_in_slice > 1)
3602 mbenc_curbe->dw2.regions_in_slice_splits_enable = 1;
3604 mbenc_curbe->dw2.regions_in_slice_splits_enable = 0;
3606 mbenc_curbe->dw3.active_num_child_threads_cu64 = 0;
3607 mbenc_curbe->dw3.active_num_child_threads_cu32_0 = 0;
3608 mbenc_curbe->dw3.active_num_child_threads_cu32_1 = 0;
3609 mbenc_curbe->dw3.active_num_child_threads_cu32_2 = 0;
3610 mbenc_curbe->dw3.active_num_child_threads_cu32_3 = 0;
3611 mbenc_curbe->dw3.slice_qp = slice_qp;
3613 mbenc_curbe->dw4.skip_mode_enable = 1;
3614 mbenc_curbe->dw4.adaptive_enable = 1;
3615 mbenc_curbe->dw4.ime_ref_window_size = 1;
3616 mbenc_curbe->dw4.hevc_min_cu_ctrl = seq_param->log2_min_luma_coding_block_size_minus3;
3618 mbenc_curbe->dw5.subpel_mode = 3;
3619 mbenc_curbe->dw5.inter_sad_measure = 2;
3620 mbenc_curbe->dw5.intra_sad_measure = 2;
3621 mbenc_curbe->dw5.len_sp = 63;
3622 mbenc_curbe->dw5.max_num_su = 63;
3623 mbenc_curbe->dw5.refid_cost_mode = 1;
3625 mbenc_curbe->dw7.max_num_merge_cand = slice_param->max_num_merge_cand;
3626 mbenc_curbe->dw7.slice_type = slice_param->slice_type;
3627 mbenc_curbe->dw7.temporal_mvp_enable = seq_param->seq_fields.bits.sps_temporal_mvp_enabled_flag;
3628 mbenc_curbe->dw7.mvp_collocated_from_l0 = slice_param->slice_fields.bits.collocated_from_l0_flag;
3629 mbenc_curbe->dw7.same_ref_list = hevc_state->is_same_ref_list;
3630 if (slice_param->slice_type == HEVC_SLICE_B)
3631 mbenc_curbe->dw7.is_low_delay = hevc_state->low_delay;
3633 mbenc_curbe->dw7.is_low_delay = 1;
3635 mbenc_curbe->dw7.num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3636 if (slice_param->slice_type == HEVC_SLICE_B)
3637 mbenc_curbe->dw7.num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3639 mbenc_curbe->dw7.num_ref_idx_l1 = 0;
3641 mbenc_curbe->dw8.fwd_poc_num_l0_mtb_0 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3642 &slice_param->ref_pic_list0[0]);
3643 mbenc_curbe->dw8.fwd_poc_num_l0_mtb_1 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3644 &slice_param->ref_pic_list0[1]);
3645 mbenc_curbe->dw9.fwd_poc_num_l0_mtb_2 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3646 &slice_param->ref_pic_list0[2]);
3647 mbenc_curbe->dw9.fwd_poc_num_l0_mtb_3 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3648 &slice_param->ref_pic_list0[3]);
3649 if (slice_param->slice_type == HEVC_SLICE_B) {
3650 mbenc_curbe->dw8.bwd_poc_num_l1_mtb_0 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3651 &slice_param->ref_pic_list1[0]);
3652 mbenc_curbe->dw8.bwd_poc_num_l1_mtb_1 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3653 &slice_param->ref_pic_list1[1]);
3654 mbenc_curbe->dw9.bwd_poc_num_l1_mtb_2 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3655 &slice_param->ref_pic_list1[2]);
3656 mbenc_curbe->dw9.bwd_poc_num_l1_mtb_3 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3657 &slice_param->ref_pic_list1[3]);
3660 mbenc_curbe->dw13.ref_frame_hor_size = hevc_state->frame_width;
3661 mbenc_curbe->dw13.ref_frame_ver_size = hevc_state->frame_height;
3663 mbenc_curbe->dw15.concurrent_gop_num = hevc_state->hevc_wf_param.num_regions;
3664 mbenc_curbe->dw15.total_thread_num_per_lcu = gen10_hevc_tu_settings[GEN10_TOTAL_THREAD_NUM_PER_LCU_TU_PARAM][tu_idx];
3665 mbenc_curbe->dw15.regions_in_slice_split_count = hevc_state->num_regions_in_slice;
3667 mbenc_curbe->dw1.max_num_ime_search_center = gen10_hevc_tu_settings[GEN10_MAX_NUM_IME_SEARCH_CENTER_TU_PARAM][tu_idx];
3669 if (hevc_state->is_64lcu)
3670 mbenc_curbe->dw2.enable_cu64_check = gen10_hevc_tu_settings[GEN10_ENABLE_CU64_CHECK_TU_PARAM][tu_idx];
3672 mbenc_curbe->dw2.enable_cu64_check = 0;
3674 mbenc_curbe->dw2.enc_trans_simplify = gen10_hevc_tu_settings[GEN10_ENC_TRANSFORM_SIMPLIFY_TU_PARAM][tu_idx];
3675 mbenc_curbe->dw2.enc_tu_dec_mode = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_MODE_TU_PARAM][tu_idx];
3676 mbenc_curbe->dw2.enc_tu_dec_for_all_qt = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_FOR_ALL_QT_TU_PARAM][tu_idx];
3677 mbenc_curbe->dw2.coef_bit_est_mode = gen10_hevc_tu_settings[GEN10_COEF_BIT_EST_MODE_TU_PARAM][tu_idx];
3678 mbenc_curbe->dw2.enc_skip_dec_mode = gen10_hevc_tu_settings[GEN10_ENC_SKIP_DECISION_MODE_TU_PARAM][tu_idx];
3679 mbenc_curbe->dw2.enc_qt_dec_mode = gen10_hevc_tu_settings[GEN10_ENC_QT_DECISION_MODE_TU_PARAM][tu_idx];
3680 mbenc_curbe->dw2.lcu32_enc_rd_dec_mode_for_all_qt = gen10_hevc_tu_settings[GEN10_ENC_RD_DECISION_MODE_FOR_ALL_QT_TU_PARAM][tu_idx];
3681 mbenc_curbe->dw2.lcu64_cu64_skip_check_only = (tu_idx == 1);
3682 mbenc_curbe->dw2.sic_dys_run_path_mode = gen10_hevc_tu_settings[GEN10_SIC_DYNAMIC_RUN_PATH_MODE][tu_idx];
3684 if (hevc_state->is_64lcu) {
3685 mbenc_curbe->dw16.bti_curr_picture_y =
3686 GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y;
3687 mbenc_curbe->dw17.bti_enc_curecord_surface =
3688 GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CU_RECORD;
3689 mbenc_curbe->dw18.bti_lcu64_enc_curecord2_surface =
3690 GEN10_HEVC_MBENC_INTER_LCU64_SECOND_CU32_ENC_CU_RECORD;
3691 mbenc_curbe->dw19.bti_lcu64_pak_objcmd_surface =
3692 GEN10_HEVC_MBENC_INTER_LCU64_PAK_OBJ0;
3693 mbenc_curbe->dw20.bti_lcu64_pak_curecord_surface =
3694 GEN10_HEVC_MBENC_INTER_LCU64_PAK_CU_RECORD;
3695 mbenc_curbe->dw21.bti_lcu64_vme_intra_inter_pred_surface =
3696 GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_IDX0;
3697 mbenc_curbe->dw22.bti_lcu64_cu16_qpdata_input_surface =
3698 GEN10_HEVC_MBENC_INTER_LCU64_CU16x16_QP_DATA;
3699 mbenc_curbe->dw23.bti_lcu64_cu32_enc_const_table_surface =
3700 GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CONST_TABLE;
3701 mbenc_curbe->dw24.bti_lcu64_colocated_mvdata_surface =
3702 GEN10_HEVC_MBENC_INTER_LCU64_COLOCATED_CU_MV_DATA;
3703 mbenc_curbe->dw25.bti_lcu64_hme_pred_surface =
3704 GEN10_HEVC_MBENC_INTER_LCU64_HME_MOTION_PREDICTOR_DATA;
3705 mbenc_curbe->dw26.bti_lcu64_lculevel_data_input_surface =
3706 GEN10_HEVC_MBENC_INTER_LCU64_LCU_LEVEL_DATA_INPUT;
3707 mbenc_curbe->dw27.bti_lcu64_cu32_enc_scratch_surface =
3708 GEN10_HEVC_MBENC_INTER_LCU64_CU32_LCU_ENC_SCRATCH_SURFACE;
3709 mbenc_curbe->dw28.bti_lcu64_64x64_dist_surface =
3710 GEN10_HEVC_MBENC_INTER_LCU64_64X64_DISTORTION_SURFACE;
3711 mbenc_curbe->dw29.bti_lcu64_concurrent_tg_data_surface =
3712 GEN10_HEVC_MBENC_INTER_LCU64_CONCURRENT_TG_DATA;
3713 mbenc_curbe->dw30.bti_lcu64_brc_combined_enc_param_surface =
3714 GEN10_HEVC_MBENC_INTER_LCU64_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3715 mbenc_curbe->dw31.bti_lcu64_cu32_jbq1d_buf_surface =
3716 GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
3717 mbenc_curbe->dw32.bti_lcu64_cu32_jbq2d_buf_surface =
3718 GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_2D_SURFACE;
3719 mbenc_curbe->dw33.bti_lcu64_cu32_residual_scratch_surface =
3720 GEN10_HEVC_MBENC_INTER_LCU64_CU32_RESIDUAL_DATA_SCRATCH_SURFACE;
3721 mbenc_curbe->dw34.bti_lcu64_cusplit_surface =
3722 GEN10_HEVC_MBENC_INTER_LCU64_CU_SPLIT_DATA_SURFACE;
3723 mbenc_curbe->dw35.bti_lcu64_curr_picture_y_2xds =
3724 GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y_2xDS;
3725 mbenc_curbe->dw36.bti_lcu64_intermediate_curecord_surface =
3726 GEN10_HEVC_MBENC_INTER_LCU64_INTERMEDIATE_CU_RECORD;
3727 mbenc_curbe->dw37.bti_lcu64_const_data_lut_surface =
3728 GEN10_HEVC_MBENC_INTER_LCU64_CONST64_DATA_LUT;
3729 mbenc_curbe->dw38.bti_lcu64_lcu_storage_surface =
3730 GEN10_HEVC_MBENC_INTER_LCU64_LCU_STORAGE_SURFACE;
3731 mbenc_curbe->dw39.bti_lcu64_vme_inter_pred_2xds_surface =
3732 GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
3733 mbenc_curbe->dw40.bti_lcu64_cu64_jbq1d_surface =
3734 GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_1D_SURFACE;
3735 mbenc_curbe->dw41.bti_lcu64_cu64_jbq2d_surface =
3736 GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_2D_SURFACE;
3737 mbenc_curbe->dw42.bti_lcu64_cu64_residual_scratch_surface =
3738 GEN10_HEVC_MBENC_INTER_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
3739 mbenc_curbe->dw43.bti_lcu64_debug_surface =
3740 GEN10_HEVC_MBENC_INTER_LCU64_DEBUG_SURFACE;
3742 mbenc_curbe->dw16.bti_curr_picture_y =
3743 GEN10_HEVC_MBENC_INTER_LCU32_CURR_Y;
3744 mbenc_curbe->dw17.bti_enc_curecord_surface =
3745 GEN10_HEVC_MBENC_INTER_LCU32_ENC_CU_RECORD;
3746 mbenc_curbe->dw18.bti_lcu32_pak_objcmd_surface =
3747 GEN10_HEVC_MBENC_INTER_LCU32_PAK_OBJ0;
3748 mbenc_curbe->dw19.bti_lcu32_pak_curecord_surface =
3749 GEN10_HEVC_MBENC_INTER_LCU32_PAK_CU_RECORD;
3750 mbenc_curbe->dw20.bti_lcu32_vme_intra_inter_pred_surface =
3751 GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_CURR_PIC_IDX0;
3752 mbenc_curbe->dw21.bti_lcu32_cu16_qpdata_input_surface =
3753 GEN10_HEVC_MBENC_INTER_LCU32_CU16x16_QP_DATA;
3754 mbenc_curbe->dw22.bti_lcu32_enc_const_table_surface =
3755 GEN10_HEVC_MBENC_INTER_LCU32_ENC_CONST_TABLE;
3756 mbenc_curbe->dw23.bti_lcu32_colocated_mvdata_surface =
3757 GEN10_HEVC_MBENC_INTER_LCU32_COLOCATED_CU_MV_DATA;
3758 mbenc_curbe->dw24.bti_lcu32_hme_pred_data_surface =
3759 GEN10_HEVC_MBENC_INTER_LCU32_HME_MOTION_PREDICTOR_DATA;
3760 mbenc_curbe->dw25.bti_lcu32_lculevel_data_input_surface =
3761 GEN10_HEVC_MBENC_INTER_LCU32_LCU_LEVEL_DATA_INPUT;
3762 mbenc_curbe->dw26.bti_lcu32_enc_scratch_surface =
3763 GEN10_HEVC_MBENC_INTER_LCU32_LCU_ENC_SCRATCH_SURFACE;
3764 mbenc_curbe->dw27.bti_lcu32_concurrent_tg_data_surface =
3765 GEN10_HEVC_MBENC_INTER_LCU32_CONCURRENT_TG_DATA;
3766 mbenc_curbe->dw28.bti_lcu32_brc_combined_enc_param_surface =
3767 GEN10_HEVC_MBENC_INTER_LCU32_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3768 mbenc_curbe->dw29.bti_lcu32_jbq_scratch_surface =
3769 GEN10_HEVC_MBENC_INTER_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
3770 mbenc_curbe->dw30.bti_lcu32_cusplit_data_surface =
3771 GEN10_HEVC_MBENC_INTER_LCU32_CU_SPLIT_DATA_SURFACE,
3772 mbenc_curbe->dw31.bti_lcu32_residual_scratch_surface =
3773 GEN10_HEVC_MBENC_INTER_LCU32_RESIDUAL_DATA_SCRATCH_SURFACE,
3774 mbenc_curbe->dw32.bti_lcu32_debug_surface =
3775 GEN10_HEVC_MBENC_INTER_LCU32_DEBUG_SURFACE;
3778 i965_gpe_context_unmap_curbe(gpe_context);
3782 gen10_hevc_enc_mbenc_intra_surfaces(VADriverContextP ctx,
3783 struct encode_state *encode_state,
3784 struct intel_encoder_context *encoder_context,
3785 struct i965_gpe_context *gpe_context)
3787 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3788 struct gen10_hevc_enc_state *hevc_state;
3789 struct object_surface *obj_surface;
3790 struct object_surface *vme_surface;
3791 struct gen10_hevc_surface_priv *surface_priv;
3794 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3796 obj_surface = encode_state->reconstructed_object;
3798 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
3800 if (hevc_state->is_10bit)
3801 vme_surface = surface_priv->converted_surface;
3803 vme_surface = encode_state->input_yuv_object;
3805 input_bti = GEN10_HEVC_MBENC_INTRA_VME_PRED_CURR_PIC_IDX0;
3806 i965_add_adv_gpe_surface(ctx, gpe_context,
3811 for (i = 0; i < 8; i++) {
3812 i965_add_adv_gpe_surface(ctx, gpe_context,
3818 input_bti = GEN10_HEVC_MBENC_INTRA_CURR_Y;
3820 i965_add_2d_gpe_surface(ctx,
3825 I965_SURFACEFORMAT_R8_UNORM,
3827 i965_add_2d_gpe_surface(ctx,
3832 I965_SURFACEFORMAT_R16_UINT,
3836 input_bti = GEN10_HEVC_MBENC_INTRA_INTERMEDIATE_CU_RECORD;
3837 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3838 &vme_context->res_temp_curecord_lcu32_surface,
3839 1, I965_SURFACEFORMAT_R8_UNORM,
3842 i965_add_buffer_gpe_surface(ctx, gpe_context,
3843 &vme_context->res_mb_code_surface,
3845 BYTES2UINT32(hevc_state->cu_records_offset),
3850 i965_add_buffer_gpe_surface(ctx, gpe_context,
3851 &vme_context->res_mb_code_surface,
3853 BYTES2UINT32(vme_context->res_mb_code_surface.size - hevc_state->cu_records_offset),
3854 hevc_state->cu_records_offset,
3857 input_bti = GEN10_HEVC_MBENC_INTRA_SCRATCH_SURFACE;
3858 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3859 &vme_context->res_scratch_surface,
3860 1, I965_SURFACEFORMAT_R8_UNORM,
3863 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3864 &vme_context->res_16x16_qp_data_surface,
3865 1, I965_SURFACEFORMAT_R8_UNORM,
3868 i965_add_buffer_gpe_surface(ctx, gpe_context,
3869 &vme_context->res_enc_const_table_intra,
3871 BYTES2UINT32(vme_context->res_enc_const_table_intra.size),
3875 i965_add_buffer_gpe_surface(ctx, gpe_context,
3876 &vme_context->res_lculevel_input_data_buffer,
3878 BYTES2UINT32(vme_context->res_lculevel_input_data_buffer.size),
3882 i965_add_buffer_gpe_surface(ctx, gpe_context,
3883 &vme_context->res_concurrent_tg_data,
3885 BYTES2UINT32(vme_context->res_concurrent_tg_data.size),
3889 i965_add_buffer_gpe_surface(ctx, gpe_context,
3890 &vme_context->res_brc_input_enc_kernel_buffer,
3892 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
3896 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3897 &vme_context->res_cu_split_surface,
3898 1, I965_SURFACEFORMAT_R8_UNORM,
3901 i965_add_buffer_gpe_surface(ctx, gpe_context,
3902 &vme_context->res_kernel_trace_data,
3904 BYTES2UINT32(vme_context->res_kernel_trace_data.size),
3910 gen10_hevc_enc_mbenc_inter_lcu32_surfaces(VADriverContextP ctx,
3911 struct encode_state *encode_state,
3912 struct intel_encoder_context *encoder_context,
3913 struct i965_gpe_context *gpe_context)
3915 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3916 struct gen10_hevc_enc_state *hevc_state;
3917 struct gen10_hevc_enc_frame_info *frame_info;
3918 struct gen10_hevc_enc_common_res *common_res;
3919 VAEncSliceParameterBufferHEVC *slice_param;
3920 VAEncPictureParameterBufferHEVC *pic_param;
3921 struct object_surface *obj_surface, *vme_surface;
3922 struct gen10_hevc_surface_priv *surface_priv;
3923 struct object_surface *l0_surface = NULL, *l1_surface = NULL, *tmp_surface;
3926 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3927 frame_info = &vme_context->frame_info;
3928 common_res = &vme_context->common_res;
3930 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3931 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3933 obj_surface = encode_state->reconstructed_object;
3935 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
3937 if (hevc_state->is_10bit)
3938 vme_surface = surface_priv->converted_surface;
3940 vme_surface = encode_state->input_yuv_object;
3942 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CURR_Y;
3943 i965_add_2d_gpe_surface(ctx,
3948 I965_SURFACEFORMAT_R8_UNORM,
3950 i965_add_2d_gpe_surface(ctx,
3955 I965_SURFACEFORMAT_R16_UINT,
3958 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_ENC_CU_RECORD;
3959 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3960 &vme_context->res_temp_curecord_lcu32_surface,
3961 1, I965_SURFACEFORMAT_R8_UNORM,
3964 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_PAK_OBJ0;
3965 i965_add_buffer_gpe_surface(ctx, gpe_context,
3966 &vme_context->res_mb_code_surface,
3968 BYTES2UINT32(hevc_state->cu_records_offset),
3971 i965_add_buffer_gpe_surface(ctx, gpe_context,
3972 &vme_context->res_mb_code_surface,
3974 BYTES2UINT32(vme_context->res_mb_code_surface.size -
3975 hevc_state->cu_records_offset),
3976 hevc_state->cu_records_offset,
3979 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_CURR_PIC_IDX0;
3981 i965_add_adv_gpe_surface(ctx, gpe_context,
3985 if (frame_info->mapped_ref_idx_list0[0] >= 0)
3986 l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
3990 if (!l0_surface || !l0_surface->private_data)
3991 l0_surface = vme_surface;
3993 surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
3994 if (hevc_state->is_10bit)
3995 l0_surface = surface_priv->converted_surface;
3998 if (slice_param->slice_type == HEVC_SLICE_B) {
3999 if (frame_info->mapped_ref_idx_list1[0] > 0)
4000 l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4004 if (!l1_surface || !l1_surface->private_data)
4005 l1_surface = l0_surface;
4007 surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4008 if (hevc_state->is_10bit)
4009 l1_surface = surface_priv->converted_surface;
4013 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_FWD_PIC_IDX0;
4014 for (i = 0; i < 4; i++) {
4015 if (frame_info->mapped_ref_idx_list0[i] >= 0)
4016 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4020 if (tmp_surface && tmp_surface->private_data) {
4021 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4022 if (hevc_state->is_10bit)
4023 tmp_surface = surface_priv->converted_surface;
4025 i965_add_adv_gpe_surface(ctx, gpe_context,
4029 i965_add_adv_gpe_surface(ctx, gpe_context,
4033 if (slice_param->slice_type == HEVC_SLICE_B) {
4034 if (frame_info->mapped_ref_idx_list1[i] >= 0)
4035 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4039 if (tmp_surface && tmp_surface->private_data) {
4040 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4041 if (hevc_state->is_10bit)
4042 tmp_surface = surface_priv->converted_surface;
4044 i965_add_adv_gpe_surface(ctx, gpe_context,
4046 input_bti + 2 * i + 1);
4048 i965_add_adv_gpe_surface(ctx, gpe_context,
4050 input_bti + 2 * i + 1);
4054 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CU16x16_QP_DATA;
4055 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4056 &vme_context->res_16x16_qp_data_surface,
4057 1, I965_SURFACEFORMAT_R8_UNORM,
4060 i965_add_buffer_gpe_surface(ctx, gpe_context,
4061 &vme_context->res_enc_const_table_inter,
4063 BYTES2UINT32(vme_context->res_enc_const_table_inter.size),
4067 if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
4068 (pic_param->collocated_ref_pic_index != 0xFF)) {
4069 obj_surface = common_res->reference_pics[pic_param->collocated_ref_pic_index].obj_surface;
4070 if (obj_surface && obj_surface->private_data) {
4071 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4073 i965_add_buffer_gpe_surface(ctx, gpe_context,
4074 &surface_priv->motion_vector_temporal,
4076 BYTES2UINT32(surface_priv->motion_vector_temporal.size),
4082 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_HME_MOTION_PREDICTOR_DATA;
4083 if (hevc_state->hme_enabled) {
4084 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4085 &vme_context->res_s4x_memv_data_surface,
4086 1, I965_SURFACEFORMAT_R8_UNORM,
4090 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_LCU_LEVEL_DATA_INPUT;
4091 i965_add_buffer_gpe_surface(ctx, gpe_context,
4092 &vme_context->res_lculevel_input_data_buffer,
4094 BYTES2UINT32(vme_context->res_lculevel_input_data_buffer.size),
4098 i965_add_buffer_gpe_surface(ctx, gpe_context,
4099 &vme_context->res_enc_scratch_buffer,
4101 BYTES2UINT32(vme_context->res_enc_scratch_buffer.size),
4106 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CONCURRENT_TG_DATA;
4107 i965_add_buffer_gpe_surface(ctx, gpe_context,
4108 &vme_context->res_concurrent_tg_data,
4110 BYTES2UINT32(vme_context->res_concurrent_tg_data.size),
4114 i965_add_buffer_gpe_surface(ctx, gpe_context,
4115 &vme_context->res_brc_input_enc_kernel_buffer,
4117 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
4121 i965_add_buffer_gpe_surface(ctx, gpe_context,
4122 &vme_context->res_jbq_header_buffer,
4124 vme_context->res_jbq_header_buffer.size,
4128 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CU_SPLIT_DATA_SURFACE;
4129 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4130 &vme_context->res_cu_split_surface,
4131 1, I965_SURFACEFORMAT_R8_UNORM,
4134 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4135 &vme_context->res_residual_scratch_lcu32_surface,
4136 1, I965_SURFACEFORMAT_R8_UNORM,
4139 i965_add_buffer_gpe_surface(ctx, gpe_context,
4140 &vme_context->res_kernel_trace_data,
4142 BYTES2UINT32(vme_context->res_kernel_trace_data.size),
4148 gen10_hevc_enc_mbenc_inter_lcu64_surfaces(VADriverContextP ctx,
4149 struct encode_state *encode_state,
4150 struct intel_encoder_context *encoder_context,
4151 struct i965_gpe_context *gpe_context)
4153 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
4154 struct gen10_hevc_enc_state *hevc_state;
4155 struct gen10_hevc_enc_common_res *common_res;
4156 struct gen10_hevc_enc_frame_info *frame_info;
4157 struct object_surface *obj_surface, *vme_surface;
4158 struct gen10_hevc_surface_priv *surface_priv;
4159 struct object_surface *l0_surface, *l1_surface, *tmp_surface;
4160 VAEncSliceParameterBufferHEVC *slice_param;
4161 VAEncPictureParameterBufferHEVC *pic_param;
4164 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
4165 frame_info = &vme_context->frame_info;
4166 common_res = &vme_context->common_res;
4168 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
4169 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
4171 obj_surface = encode_state->reconstructed_object;
4173 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4175 if (hevc_state->is_10bit)
4176 vme_surface = surface_priv->converted_surface;
4178 vme_surface = encode_state->input_yuv_object;
4180 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y;
4181 i965_add_2d_gpe_surface(ctx,
4186 I965_SURFACEFORMAT_R8_UNORM,
4188 i965_add_2d_gpe_surface(ctx,
4193 I965_SURFACEFORMAT_R16_UINT,
4196 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CU_RECORD;
4197 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4198 &vme_context->res_temp_curecord_lcu32_surface,
4199 1, I965_SURFACEFORMAT_R8_UNORM,
4202 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4203 &vme_context->res_temp2_curecord_lcu32_surface,
4204 1, I965_SURFACEFORMAT_R8_UNORM,
4207 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_PAK_OBJ0;
4208 i965_add_buffer_gpe_surface(ctx, gpe_context,
4209 &vme_context->res_mb_code_surface,
4211 hevc_state->cu_records_offset,
4214 i965_add_buffer_gpe_surface(ctx, gpe_context,
4215 &vme_context->res_mb_code_surface,
4217 vme_context->res_mb_code_surface.size,
4218 hevc_state->cu_records_offset,
4221 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_IDX0;
4223 i965_add_adv_gpe_surface(ctx, gpe_context,
4227 if (frame_info->mapped_ref_idx_list0[0] >= 0)
4228 l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
4232 if (!l0_surface || !l0_surface->private_data)
4233 l0_surface = vme_surface;
4235 surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
4236 if (hevc_state->is_10bit)
4237 l0_surface = surface_priv->converted_surface;
4240 l1_surface = l0_surface;
4241 if (slice_param->slice_type == HEVC_SLICE_B) {
4242 if (frame_info->mapped_ref_idx_list1[0] > 0)
4243 l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4247 if (!l1_surface || !l1_surface->private_data)
4248 l1_surface = l0_surface;
4250 surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4251 if (hevc_state->is_10bit)
4252 l1_surface = surface_priv->converted_surface;
4256 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_FWD_PIC_IDX0;
4257 for (i = 0; i < 4; i++) {
4258 if (frame_info->mapped_ref_idx_list0[i] >= 0)
4259 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4263 if (tmp_surface && tmp_surface->private_data) {
4264 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4265 if (hevc_state->is_10bit)
4266 tmp_surface = surface_priv->converted_surface;
4268 i965_add_adv_gpe_surface(ctx, gpe_context,
4272 i965_add_adv_gpe_surface(ctx, gpe_context,
4276 if (slice_param->slice_type == HEVC_SLICE_B) {
4277 if (frame_info->mapped_ref_idx_list1[i] >= 0)
4278 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
4282 if (tmp_surface && tmp_surface->private_data) {
4283 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4284 if (hevc_state->is_10bit)
4285 tmp_surface = surface_priv->converted_surface;
4287 i965_add_adv_gpe_surface(ctx, gpe_context,
4289 input_bti + 2 * i + 1);
4291 i965_add_adv_gpe_surface(ctx, gpe_context,
4293 input_bti + 2 * i + 1);
4297 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU16x16_QP_DATA;
4298 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4299 &vme_context->res_16x16_qp_data_surface,
4300 1, I965_SURFACEFORMAT_R8_UNORM,
4303 i965_add_buffer_gpe_surface(ctx, gpe_context,
4304 &vme_context->res_enc_const_table_inter,
4306 vme_context->res_enc_const_table_inter.size,
4310 if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
4311 (pic_param->collocated_ref_pic_index != 0xFF)) {
4312 obj_surface = common_res->reference_pics[pic_param->collocated_ref_pic_index].obj_surface;
4313 if (obj_surface && obj_surface->private_data) {
4314 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4316 i965_add_buffer_gpe_surface(ctx, gpe_context,
4317 &surface_priv->motion_vector_temporal,
4319 surface_priv->motion_vector_temporal.size,
4325 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_HME_MOTION_PREDICTOR_DATA;
4326 if (hevc_state->hme_enabled) {
4327 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4328 &vme_context->res_s4x_memv_data_surface,
4329 1, I965_SURFACEFORMAT_R8_UNORM,
4333 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_LCU_LEVEL_DATA_INPUT;
4334 i965_add_buffer_gpe_surface(ctx, gpe_context,
4335 &vme_context->res_lculevel_input_data_buffer,
4337 vme_context->res_lculevel_input_data_buffer.size,
4341 i965_add_buffer_gpe_surface(ctx, gpe_context,
4342 &vme_context->res_enc_scratch_buffer,
4344 vme_context->res_enc_scratch_buffer.size,
4348 i965_add_buffer_gpe_surface(ctx, gpe_context,
4349 &vme_context->res_64x64_dist_buffer,
4351 vme_context->res_64x64_dist_buffer.size,
4357 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CONCURRENT_TG_DATA;
4358 i965_add_buffer_gpe_surface(ctx, gpe_context,
4359 &vme_context->res_concurrent_tg_data,
4361 vme_context->res_concurrent_tg_data.size,
4365 i965_add_buffer_gpe_surface(ctx, gpe_context,
4366 &vme_context->res_brc_input_enc_kernel_buffer,
4368 vme_context->res_brc_input_enc_kernel_buffer.size,
4373 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
4374 i965_add_buffer_gpe_surface(ctx, gpe_context,
4375 &vme_context->res_jbq_header_buffer,
4377 vme_context->res_jbq_header_buffer.size,
4381 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4382 &vme_context->res_jbq_data_lcu32_surface,
4383 1, I965_SURFACEFORMAT_R8_UNORM,
4386 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4387 &vme_context->res_residual_scratch_lcu32_surface,
4388 1, I965_SURFACEFORMAT_R8_UNORM,
4392 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU_SPLIT_DATA_SURFACE;
4393 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4394 &vme_context->res_cu_split_surface,
4395 1, I965_SURFACEFORMAT_R8_UNORM,
4398 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y_2xDS;
4399 obj_surface = encode_state->reconstructed_object;
4400 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4401 vme_surface = surface_priv->scaled_2x_surface;
4403 i965_add_2d_gpe_surface(ctx,
4408 I965_SURFACEFORMAT_R8_UNORM,
4411 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_INTERMEDIATE_CU_RECORD;
4412 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4413 &vme_context->res_temp_curecord_surface_lcu64,
4414 1, I965_SURFACEFORMAT_R8_UNORM,
4417 i965_add_buffer_gpe_surface(ctx, gpe_context,
4418 &vme_context->res_enc_const_table_inter_lcu64,
4420 vme_context->res_enc_const_table_inter_lcu64.size,
4424 i965_add_buffer_gpe_surface(ctx, gpe_context,
4425 &vme_context->res_enc_scratch_lcu64_buffer,
4427 vme_context->res_enc_scratch_lcu64_buffer.size,
4431 if (frame_info->mapped_ref_idx_list0[0] >= 0)
4432 l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
4436 if (!l0_surface || !l0_surface->private_data) {
4437 l0_surface = vme_surface;
4439 surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
4440 l0_surface = surface_priv->scaled_2x_surface;
4443 l1_surface = l0_surface;
4444 if (slice_param->slice_type == HEVC_SLICE_B) {
4445 if (frame_info->mapped_ref_idx_list1[0] > 0)
4446 l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4450 if (!l1_surface || !l1_surface->private_data)
4451 l1_surface = l0_surface;
4453 surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4454 l1_surface = surface_priv->scaled_2x_surface;
4458 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
4459 i965_add_adv_gpe_surface(ctx, gpe_context,
4463 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
4464 for (i = 0; i < 4; i++) {
4465 if (frame_info->mapped_ref_idx_list0[i] >= 0)
4466 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4470 if (tmp_surface && tmp_surface->private_data) {
4471 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4472 tmp_surface = surface_priv->scaled_2x_surface;
4474 i965_add_adv_gpe_surface(ctx, gpe_context,
4478 i965_add_adv_gpe_surface(ctx, gpe_context,
4484 if (slice_param->slice_type == HEVC_SLICE_B) {
4485 if (frame_info->mapped_ref_idx_list1[i] >= 0)
4486 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
4490 if (tmp_surface && tmp_surface->private_data) {
4491 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4492 tmp_surface = surface_priv->scaled_2x_surface;
4494 i965_add_adv_gpe_surface(ctx, gpe_context,
4496 input_bti + 2 * i + 1);
4498 i965_add_adv_gpe_surface(ctx, gpe_context,
4500 input_bti + 2 * i + 1);
4504 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_1D_SURFACE;
4506 i965_add_buffer_gpe_surface(ctx, gpe_context,
4507 &vme_context->res_jbq_header_lcu64_buffer,
4509 vme_context->res_jbq_header_lcu64_buffer.size,
4513 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4514 &vme_context->res_jbq_data_lcu64_surface,
4515 1, I965_SURFACEFORMAT_R8_UNORM,
4518 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
4519 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4520 &vme_context->res_residual_scratch_lcu64_surface,
4521 1, I965_SURFACEFORMAT_R8_UNORM,
4525 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_DEBUG_SURFACE;
4526 i965_add_buffer_gpe_surface(ctx, gpe_context,
4527 &vme_context->res_kernel_trace_data,
4529 vme_context->res_kernel_trace_data.size,
4535 gen10_hevc_mbenc_init_walker_param(struct gen10_hevc_enc_state *hevc_state,
4536 struct gen10_hevc_enc_kernel_walker_parameter *kernel_walker_param,
4537 struct gpe_media_object_walker_parameter *media_object_walker_param,
4538 struct gen10_hevc_gpe_scoreboard *hw_scoreboard)
4540 int mw_26zx_h_factor;
4542 if (kernel_walker_param->use_custom_walker == 0) {
4543 hw_scoreboard->scoreboard0.mask = 0x7F;
4544 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4545 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4547 hw_scoreboard->dw1.scoreboard1.delta_x0 = 0xF;
4548 hw_scoreboard->dw1.scoreboard1.delta_y0 = 0x0;
4550 hw_scoreboard->dw1.scoreboard1.delta_x1 = 0x0;
4551 hw_scoreboard->dw1.scoreboard1.delta_y1 = 0xF;
4553 hw_scoreboard->dw1.scoreboard1.delta_x2 = 1;
4554 hw_scoreboard->dw1.scoreboard1.delta_y2 = 0xF;
4556 hw_scoreboard->dw1.scoreboard1.delta_x3 = 0xF;
4557 hw_scoreboard->dw1.scoreboard1.delta_y3 = 0xF;
4559 hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4560 hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4561 hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4562 hw_scoreboard->dw2.scoreboard2.delta_y5 = 0;
4563 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4564 hw_scoreboard->dw2.scoreboard2.delta_y6 = 0;
4565 hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4566 hw_scoreboard->dw2.scoreboard2.delta_y7 = 0;
4568 gen10_init_media_object_walker_parameter(kernel_walker_param, media_object_walker_param);
4572 media_object_walker_param->color_count_minus1 = hevc_state->hevc_wf_param.num_regions - 1;
4574 media_object_walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
4576 media_object_walker_param->local_loop_exec_count = 0xFFF;
4577 media_object_walker_param->global_loop_exec_count = 0xFFF;
4579 switch (kernel_walker_param->walker_degree) {
4580 case GEN10_WALKER_26_DEGREE:
4581 if (hevc_state->num_regions_in_slice > 1) {
4582 int thread_space_width = kernel_walker_param->resolution_x;
4583 int thread_space_height = hevc_state->hevc_wf_param.max_height_in_region;
4585 int ts_width = thread_space_width;
4586 int ts_height = thread_space_height;
4587 int tmp_height = (ts_height + 1) & 0xfffe;
4588 ts_height = tmp_height;
4589 tmp_height = ((ts_width + 1) >> 1) + ((ts_width + ((tmp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4591 media_object_walker_param->block_resolution.x = ts_width;
4592 media_object_walker_param->block_resolution.y = tmp_height;
4594 media_object_walker_param->global_start.x = 0;
4595 media_object_walker_param->global_start.y = 0;
4597 media_object_walker_param->global_resolution.x = ts_width;
4598 media_object_walker_param->global_resolution.y = tmp_height;
4600 media_object_walker_param->local_start.x = (ts_width + 1) & 0xfffe;;
4601 media_object_walker_param->local_start.y = 0;
4603 media_object_walker_param->local_end.x = 0;
4604 media_object_walker_param->local_end.y = 0;
4606 media_object_walker_param->global_outer_loop_stride.x = ts_width;
4607 media_object_walker_param->global_outer_loop_stride.y = 0;
4609 media_object_walker_param->global_inner_loop_unit.x = 0;
4610 media_object_walker_param->global_inner_loop_unit.y = tmp_height;
4612 media_object_walker_param->scoreboard_mask = 0x7F;
4613 media_object_walker_param->local_outer_loop_stride.x = 1;
4614 media_object_walker_param->local_outer_loop_stride.y = 0;
4615 media_object_walker_param->local_inner_loop_unit.x = -2;
4616 media_object_walker_param->local_inner_loop_unit.y = 1;
4618 media_object_walker_param->global_loop_exec_count = 0;
4619 media_object_walker_param->local_loop_exec_count = (thread_space_width + (ts_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4621 media_object_walker_param->block_resolution.x = kernel_walker_param->resolution_x;
4622 media_object_walker_param->block_resolution.y = kernel_walker_param->resolution_y;
4624 media_object_walker_param->global_resolution.x = media_object_walker_param->block_resolution.x;
4625 media_object_walker_param->global_resolution.y = media_object_walker_param->block_resolution.y;
4627 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->block_resolution.x;
4628 media_object_walker_param->global_outer_loop_stride.y = 0;
4630 media_object_walker_param->global_inner_loop_unit.x = 0;
4631 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->block_resolution.y;
4633 media_object_walker_param->scoreboard_mask = 0x7F;
4634 media_object_walker_param->local_outer_loop_stride.x = 1;
4635 media_object_walker_param->local_outer_loop_stride.y = 0;
4636 media_object_walker_param->local_inner_loop_unit.x = -2;
4637 media_object_walker_param->local_inner_loop_unit.y = 1;
4641 hw_scoreboard->scoreboard0.mask = 0x7F;
4642 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4644 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4645 hw_scoreboard->dw1.scoreboard1.delta_y0 = 0;
4647 hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4648 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4650 hw_scoreboard->dw1.scoreboard1.delta_x2 = 0;
4651 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4653 hw_scoreboard->dw1.scoreboard1.delta_x3 = 1;
4654 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4656 hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4657 hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4659 hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4660 hw_scoreboard->dw2.scoreboard2.delta_y5 = 0;
4662 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4663 hw_scoreboard->dw2.scoreboard2.delta_y6 = 0;
4665 hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4666 hw_scoreboard->dw2.scoreboard2.delta_y7 = 0;
4669 case GEN10_WALKER_26Z_DEGREE: {
4670 media_object_walker_param->scoreboard_mask = 0x7f;
4672 media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x;
4673 media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y;
4675 media_object_walker_param->global_outer_loop_stride.x = 2;
4676 media_object_walker_param->global_outer_loop_stride.y = 0;
4678 media_object_walker_param->global_inner_loop_unit.x = 0xFFF - 4 + 1;
4679 media_object_walker_param->global_inner_loop_unit.y = 2;
4681 media_object_walker_param->local_outer_loop_stride.x = 0;
4682 media_object_walker_param->local_outer_loop_stride.y = 1;
4683 media_object_walker_param->local_inner_loop_unit.x = 1;
4684 media_object_walker_param->local_inner_loop_unit.y = 0;
4686 media_object_walker_param->block_resolution.x = 2;
4687 media_object_walker_param->block_resolution.y = 2;
4691 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4692 hw_scoreboard->scoreboard0.mask = 0x7F;
4693 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4695 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4696 hw_scoreboard->dw1.scoreboard1.delta_y0 = 1;
4698 hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4699 hw_scoreboard->dw1.scoreboard1.delta_y1 = 0;
4701 hw_scoreboard->dw1.scoreboard1.delta_x2 = -1;
4702 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4704 hw_scoreboard->dw1.scoreboard1.delta_x3 = 0;
4705 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4707 hw_scoreboard->dw2.scoreboard2.delta_x4 = 1;
4708 hw_scoreboard->dw2.scoreboard2.delta_y4 = -1;
4711 case GEN10_WALKER_26X_DEGREE:
4712 if (hevc_state->num_regions_in_slice > 1) {
4713 int thread_space_width = ALIGN(hevc_state->frame_width, 32) >> 5;
4714 int ts_width = thread_space_width;
4715 int ts_height = hevc_state->hevc_wf_param.max_height_in_region;
4716 int tmp_height = (ts_height + 1) & 0xfffe;
4717 ts_height = tmp_height;
4718 tmp_height = ((ts_width + 1) >> 1) + ((ts_width + ((tmp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4719 tmp_height *= (hevc_state->thread_num_per_ctb);
4721 media_object_walker_param->scoreboard_mask = 0xff;
4723 media_object_walker_param->global_resolution.x = ts_width;
4724 media_object_walker_param->global_resolution.y = tmp_height;
4726 media_object_walker_param->global_start.x = 0;
4727 media_object_walker_param->global_start.y = 0;
4729 media_object_walker_param->local_start.x = (ts_width + 1) & 0xfffe;
4730 media_object_walker_param->local_start.y = 0;
4732 media_object_walker_param->local_end.x = 0;
4733 media_object_walker_param->local_end.y = 0;
4735 media_object_walker_param->global_outer_loop_stride.x = ts_width;
4736 media_object_walker_param->global_outer_loop_stride.y = 0;
4738 media_object_walker_param->global_inner_loop_unit.x = 0;
4739 media_object_walker_param->global_inner_loop_unit.y = tmp_height;
4741 media_object_walker_param->local_outer_loop_stride.x = 1;
4742 media_object_walker_param->local_outer_loop_stride.y = 0;
4743 media_object_walker_param->local_inner_loop_unit.x = -2;
4744 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4745 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4746 media_object_walker_param->mid_loop_unit_x = 0;
4747 media_object_walker_param->mid_loop_unit_y = 1;
4749 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4750 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4752 media_object_walker_param->global_loop_exec_count = 0;
4753 media_object_walker_param->local_loop_exec_count = (thread_space_width + (ts_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4755 media_object_walker_param->scoreboard_mask = 0xff;
4757 media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x;
4758 media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y * hevc_state->thread_num_per_ctb;
4760 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4761 media_object_walker_param->global_outer_loop_stride.y = 0;
4763 media_object_walker_param->global_inner_loop_unit.x = 0;
4764 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4766 media_object_walker_param->local_outer_loop_stride.x = 1;
4767 media_object_walker_param->local_outer_loop_stride.y = 0;
4768 media_object_walker_param->local_inner_loop_unit.x = 0xFFF - 2 + 1; // -2 in 2's compliment format;
4769 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4770 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4771 media_object_walker_param->mid_loop_unit_x = 0;
4772 media_object_walker_param->mid_loop_unit_y = 1;
4774 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4775 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4779 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4780 hw_scoreboard->scoreboard0.mask = 0xff;
4781 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4783 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4784 hw_scoreboard->dw1.scoreboard1.delta_y0 = hevc_state->thread_num_per_ctb - 1;
4786 hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4787 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4789 hw_scoreboard->dw1.scoreboard1.delta_x2 = 0;
4790 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4792 hw_scoreboard->dw1.scoreboard1.delta_x3 = 1;
4793 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4795 hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4796 hw_scoreboard->dw2.scoreboard2.delta_y4 = -hevc_state->thread_num_per_ctb;
4798 hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4799 hw_scoreboard->dw2.scoreboard2.delta_y5 = -2;
4801 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4802 hw_scoreboard->dw2.scoreboard2.delta_y6 = -3;
4804 hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4805 hw_scoreboard->dw2.scoreboard2.delta_y7 = -4;
4809 case GEN10_WALKER_26ZX_DEGREE:
4810 mw_26zx_h_factor = 5;
4812 if (hevc_state->num_regions_in_slice > 1) {
4813 int thread_space_width = ALIGN(hevc_state->frame_width, 64) >> 6;
4814 int thread_space_height = hevc_state->hevc_wf_param.max_height_in_region;
4815 int sp_width = (thread_space_width + 1) & 0xfffe;
4816 int sp_height = (thread_space_height + 1) & 0xfffe;
4817 int wf_num = (sp_width + (sp_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4818 sp_height = ((sp_width + 1) >> 1) + ((sp_width + ((sp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4819 int ts_width = sp_width * mw_26zx_h_factor;
4820 int ts_height = sp_height * (hevc_state->thread_num_per_ctb);
4822 media_object_walker_param->scoreboard_mask = 0xff;
4824 media_object_walker_param->global_resolution.x = ts_width;
4825 media_object_walker_param->global_resolution.y = ts_height;
4827 media_object_walker_param->global_start.x = 0;
4828 media_object_walker_param->global_start.y = 0;
4830 media_object_walker_param->local_start.x = media_object_walker_param->global_resolution.x;
4831 media_object_walker_param->local_start.y = 0;
4833 media_object_walker_param->local_end.x = 0;
4834 media_object_walker_param->local_end.y = 0;
4836 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4837 media_object_walker_param->global_outer_loop_stride.y = 0;
4839 media_object_walker_param->global_inner_loop_unit.x = 0;
4840 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4842 media_object_walker_param->local_outer_loop_stride.x = 1;
4843 media_object_walker_param->local_outer_loop_stride.y = 0;
4844 media_object_walker_param->local_inner_loop_unit.x = -mw_26zx_h_factor * 2;
4845 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4846 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4847 media_object_walker_param->mid_loop_unit_x = 0;
4848 media_object_walker_param->mid_loop_unit_y = 1;
4850 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4851 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4853 media_object_walker_param->global_loop_exec_count = 0;
4854 media_object_walker_param->local_loop_exec_count = (wf_num + 1) * mw_26zx_h_factor;
4856 media_object_walker_param->scoreboard_mask = 0xff;
4858 media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x * mw_26zx_h_factor;
4859 media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y * hevc_state->thread_num_per_ctb;
4861 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4862 media_object_walker_param->global_outer_loop_stride.y = 0;
4864 media_object_walker_param->global_inner_loop_unit.x = 0;
4865 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4867 media_object_walker_param->local_outer_loop_stride.x = 1;
4868 media_object_walker_param->local_outer_loop_stride.y = 0;
4869 media_object_walker_param->local_inner_loop_unit.x = 0xFFF - 10 + 1; // -10 in 2's compliment format;
4870 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4871 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4872 media_object_walker_param->mid_loop_unit_x = 0;
4873 media_object_walker_param->mid_loop_unit_y = 1;
4875 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4876 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4880 hw_scoreboard->scoreboard0.mask = 0xff;
4881 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4882 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4884 hw_scoreboard->dw1.scoreboard1.delta_x0 = -5;
4885 hw_scoreboard->dw1.scoreboard1.delta_y0 = -1;
4887 hw_scoreboard->dw1.scoreboard1.delta_x1 = -2;
4888 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4890 hw_scoreboard->dw1.scoreboard1.delta_x2 = 3;
4891 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4893 hw_scoreboard->dw1.scoreboard1.delta_x3 = -1;
4894 hw_scoreboard->dw1.scoreboard1.delta_y3 = 0;
4896 hw_scoreboard->dw2.scoreboard2.delta_x4 = -2;
4897 hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4899 hw_scoreboard->dw2.scoreboard2.delta_x5 = -5;
4900 hw_scoreboard->dw2.scoreboard2.delta_y5 = hevc_state->thread_num_per_ctb - 1;
4902 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4903 hw_scoreboard->dw2.scoreboard2.delta_y6 = -1;
4905 hw_scoreboard->dw2.scoreboard2.delta_x7 = 5;
4906 hw_scoreboard->dw2.scoreboard2.delta_y7 = -1;
4917 gen10_hevc_update_scoreboard(struct i965_gpe_context *gpe_context,
4918 struct gen10_hevc_gpe_scoreboard *scoreboard)
4920 if (!gpe_context || !scoreboard)
4923 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard->scoreboard0.mask;
4924 gpe_context->vfe_desc5.scoreboard0.type = scoreboard->scoreboard0.type;
4925 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard->scoreboard0.enable;
4927 gpe_context->vfe_desc6.dword = scoreboard->dw1.value;
4928 gpe_context->vfe_desc7.dword = scoreboard->dw2.value;
4933 gen10_hevc_enc_mbenc_kernel(VADriverContextP ctx,
4934 struct encode_state *encode_state,
4935 struct intel_encoder_context *encoder_context,
4938 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
4939 struct gen10_hevc_enc_state *hevc_state;
4940 struct i965_gpe_context *gpe_context;
4942 struct gpe_media_object_walker_parameter media_object_walker_param;
4943 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
4944 struct gen10_hevc_gpe_scoreboard hw_scoreboard;
4947 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
4949 if (mbenc_type != GEN10_HEVC_MBENC_INTRA)
4950 gen10_hevc_enc_generate_regions_in_slice_control(ctx, encode_state, encoder_context);
4952 switch (mbenc_type) {
4953 case GEN10_HEVC_MBENC_INTER_LCU32:
4954 mbenc_idx = GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10;
4955 media_function = GEN10_HEVC_MEDIA_STATE_MBENC_LCU32;
4957 case GEN10_HEVC_MBENC_INTER_LCU64:
4958 mbenc_idx = GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10;
4959 media_function = GEN10_HEVC_MEDIA_STATE_MBENC_LCU64;
4961 case GEN10_HEVC_MBENC_INTRA:
4963 mbenc_idx = GEN10_HEVC_MBENC_I_KRNIDX_G10;
4964 media_function = GEN10_HEVC_MEDIA_STATE_MBENC_INTRA;
4968 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_idx]);
4970 memset(&hw_scoreboard, 0, sizeof(hw_scoreboard));
4971 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4972 gen8_gpe_context_init(ctx, gpe_context);
4973 gen9_gpe_reset_binding_table(ctx, gpe_context);
4975 kernel_walker_param.use_scoreboard = hevc_state->use_hw_scoreboard;
4976 kernel_walker_param.use_custom_walker = 0;
4977 if (mbenc_type == GEN10_HEVC_MBENC_INTRA)
4978 gen10_hevc_enc_mbenc_intra_curbe(ctx, encode_state, encoder_context, gpe_context);
4980 gen10_hevc_enc_mbenc_inter_curbe(ctx, encode_state, encoder_context, gpe_context);
4982 if (mbenc_type == GEN10_HEVC_MBENC_INTRA) {
4983 gen10_hevc_enc_mbenc_intra_surfaces(ctx, encode_state, encoder_context, gpe_context);
4984 kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width, 32) >> 5;
4985 kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height, 32) >> 5;
4986 if (hevc_state->is_64lcu) {
4987 kernel_walker_param.walker_degree = GEN10_WALKER_26_DEGREE;// 26_DEGREE
4988 kernel_walker_param.use_custom_walker = 1;
4990 kernel_walker_param.use_vertical_scan = 1;
4992 } else if (mbenc_type == GEN10_HEVC_MBENC_INTER_LCU32) {
4993 gen10_hevc_enc_mbenc_inter_lcu32_surfaces(ctx, encode_state, encoder_context, gpe_context);
4994 kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width, 32) >> 5;
4995 kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height, 32) >> 5;
4996 kernel_walker_param.use_custom_walker = 1;
4997 if (hevc_state->brc.target_usage == 7)
4998 kernel_walker_param.walker_degree = GEN10_WALKER_26_DEGREE;
5000 kernel_walker_param.walker_degree = GEN10_WALKER_26X_DEGREE;
5002 gen10_hevc_enc_mbenc_inter_lcu64_surfaces(ctx, encode_state, encoder_context, gpe_context);
5003 kernel_walker_param.resolution_x = vme_context->frame_info.width_in_lcu;
5004 kernel_walker_param.resolution_y = vme_context->frame_info.height_in_lcu;
5005 kernel_walker_param.use_custom_walker = 1;
5006 kernel_walker_param.walker_degree = GEN10_WALKER_26ZX_DEGREE;
5009 gen10_hevc_enc_generate_lculevel_data(ctx, encode_state, encoder_context);
5011 memset(&hw_scoreboard, 0, sizeof(hw_scoreboard));
5012 memset(&media_object_walker_param, 0, sizeof(media_object_walker_param));
5014 gen10_hevc_mbenc_init_walker_param(hevc_state, &kernel_walker_param,
5015 &media_object_walker_param,
5018 gen10_hevc_update_scoreboard(gpe_context, &hw_scoreboard);
5020 gen8_gpe_setup_interface_data(ctx, gpe_context);
5022 gen10_run_kernel_media_object_walker(ctx, encoder_context,
5025 &media_object_walker_param);
5029 gen10_hevc_vme_pipeline_prepare(VADriverContextP ctx,
5030 struct encode_state *encode_state,
5031 struct intel_encoder_context *encoder_context)
5033 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
5034 struct gen10_hevc_enc_state *hevc_state;
5035 struct gen10_hevc_enc_frame_info *frame_info;
5036 struct gen10_hevc_enc_common_res *common_res;
5039 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
5040 frame_info = &vme_context->frame_info;
5041 common_res = &vme_context->common_res;
5043 if (hevc_state->is_64lcu || hevc_state->is_10bit) {
5044 if (frame_info->picture_coding_type != HEVC_SLICE_I) {
5045 for (i = 0; i < 16; i++) {
5046 if (common_res->reference_pics[i].obj_surface == NULL)
5049 gen10_hevc_enc_conv_scaling_surface(ctx, encode_state,
5052 common_res->reference_pics[i].obj_surface,
5058 gen10_hevc_enc_conv_scaling_surface(ctx, encode_state, encoder_context,
5059 common_res->uncompressed_pic.obj_surface,
5060 common_res->reconstructed_pic.obj_surface,
5063 return VA_STATUS_SUCCESS;
5067 gen10_hevc_vme_pipeline(VADriverContextP ctx,
5069 struct encode_state *encode_state,
5070 struct intel_encoder_context *encoder_context)
5072 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
5073 struct gen10_hevc_enc_state *hevc_state;
5074 struct gen10_hevc_enc_frame_info *frame_info;
5075 VAStatus va_status = VA_STATUS_SUCCESS;
5077 if (!vme_context || !vme_context->enc_priv_state)
5078 return VA_STATUS_ERROR_INVALID_CONTEXT;
5080 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
5081 frame_info = &vme_context->frame_info;
5083 va_status = gen10_hevc_enc_init_parameters(ctx, encode_state, encoder_context);
5084 if (va_status != VA_STATUS_SUCCESS)
5087 va_status = gen10_hevc_vme_pipeline_prepare(ctx, encode_state, encoder_context);
5088 if (va_status != VA_STATUS_SUCCESS)
5091 if (hevc_state->brc.brc_reset || !hevc_state->brc.brc_inited) {
5092 gen10_hevc_enc_brc_init_reset(ctx, encode_state, encoder_context);
5094 hevc_state->brc.brc_inited = 1;
5095 hevc_state->brc.brc_reset = 0;
5098 if (frame_info->picture_coding_type == HEVC_SLICE_I) {
5099 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5100 GEN10_HEVC_HME_LEVEL_4X,
5101 GEN10_HEVC_ME_DIST_TYPE_INTRA_BRC);
5103 if (hevc_state->hme_enabled) {
5104 if (hevc_state->b16xme_enabled)
5105 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5106 GEN10_HEVC_HME_LEVEL_16X,
5107 GEN10_HEVC_ME_DIST_TYPE_INTER_BRC);
5111 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5112 GEN10_HEVC_HME_LEVEL_4X,
5113 GEN10_HEVC_ME_DIST_TYPE_INTER_BRC);
5117 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5118 GEN10_HEVC_HME_LEVEL_4X,
5119 GEN10_HEVC_ME_DIST_TYPE_INTRA);
5121 gen10_hevc_enc_brc_frame_update_kernel(ctx, encode_state,
5124 gen10_hevc_enc_brc_lcu_update_kernel(ctx, encode_state,
5127 if (frame_info->picture_coding_type == HEVC_SLICE_I)
5128 gen10_hevc_enc_mbenc_kernel(ctx, encode_state, encoder_context,
5129 GEN10_HEVC_MBENC_INTRA);
5131 gen10_hevc_enc_mbenc_kernel(ctx, encode_state, encoder_context,
5132 (hevc_state->is_64lcu ?
5133 GEN10_HEVC_MBENC_INTER_LCU64 :
5134 GEN10_HEVC_MBENC_INTER_LCU32));
5138 if (hevc_state->frame_number == 0) {
5139 struct gen10_hevc_surface_priv *surface_priv = NULL;
5141 surface_priv = (struct gen10_hevc_surface_priv *)encode_state->reconstructed_object->private_data;
5142 //print_out_obj_surface(ctx, surface_priv->scaled_4x_surface_id, 1);
5144 //print_out_gpe_resource(&vme_context->res_mb_code_surface, 0,
5145 // hevc_state->cu_records_offset, 1, 0, 0, 64);
5146 //print_out_gpe_resource(&vme_context->res_mb_code_surface, 0,
5148 //print_out_gpe_resource(&vme_context->res_s4x_me_dist_surface, 0,
5151 //return VA_STATUS_ERROR_INVALID_PARAMETER;
5154 return VA_STATUS_SUCCESS;
5158 gen10_hevc_hcp_pipe_mode_select(VADriverContextP ctx,
5159 struct encode_state *encode_state,
5160 struct intel_encoder_context *encoder_context,
5161 struct intel_batchbuffer *batch)
5163 struct gen10_hevc_enc_context *pak_context;
5164 struct gen10_hevc_enc_state *hevc_state;
5165 gen10_hcp_pipe_mode_select_param param;
5167 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5168 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5170 memset(¶m, 0, sizeof(param));
5172 param.dw1.codec_select = GEN10_HCP_ENCODE;
5173 param.dw1.codec_standard_select = GEN10_HCP_HEVC_CODEC;
5174 param.dw1.sao_first_pass = hevc_state->sao_first_pass_flag;
5175 param.dw1.rdoq_enabled = hevc_state->rdoq_enabled;
5176 param.dw1.pak_frame_level_streamout_enabled = 1;
5178 if (hevc_state->brc.brc_enabled &&
5179 hevc_state->curr_pak_idx != (hevc_state->num_sao_passes - 1))
5180 param.dw1.pak_streamout_enabled = 1;
5182 gen10_hcp_pipe_mode_select(ctx, batch, ¶m);
5186 gen10_hevc_hcp_multi_surfaces(VADriverContextP ctx,
5187 struct encode_state *encode_state,
5188 struct intel_encoder_context *encoder_context,
5189 struct intel_batchbuffer *batch)
5191 struct gen10_hevc_enc_context *pak_context;
5192 gen10_hcp_surface_state_param param;
5193 struct object_surface *obj_surface;
5196 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5198 for (i = 0; i < 2; i++) {
5200 obj_surface = pak_context->common_res.reconstructed_pic.obj_surface;
5202 obj_surface = pak_context->common_res.uncompressed_pic.obj_surface;
5204 memset(¶m, 0, sizeof(param));
5206 param.dw1.surface_pitch = obj_surface->width - 1;
5207 param.dw1.surface_id = (i == 0 ? GEN10_HCP_DECODE_SURFACE_ID :
5208 GEN10_HCP_INPUT_SURFACE_ID);
5209 param.dw2.y_cb_offset = obj_surface->y_cb_offset;
5211 if (obj_surface->fourcc == VA_FOURCC_P010)
5212 param.dw2.surface_format = SURFACE_FORMAT_P010;
5213 else if (obj_surface->fourcc == VA_FOURCC_NV12)
5214 param.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5218 gen10_hcp_surface_state(ctx, batch, ¶m);
5223 gen10_hevc_hcp_pipe_buf_state(VADriverContextP ctx,
5224 struct encode_state *encode_state,
5225 struct intel_encoder_context *encoder_context,
5226 struct intel_batchbuffer *batch)
5228 struct gen10_hevc_enc_context *pak_context;
5229 struct gen10_hevc_surface_priv *surface_priv;
5230 gen10_hcp_pipe_buf_addr_state_param param;
5231 struct gen10_hevc_enc_common_res *common_res;
5234 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5236 common_res = &pak_context->common_res;
5237 surface_priv = (struct gen10_hevc_surface_priv *)common_res->
5238 reconstructed_pic.obj_surface->private_data;
5240 memset(¶m, 0, sizeof(param));
5242 param.reconstructed = &common_res->reconstructed_pic.gpe_res;
5243 param.deblocking_filter_line = &common_res->deblocking_filter_line_buffer;
5244 param.deblocking_filter_tile_line = &common_res->deblocking_filter_tile_line_buffer;
5245 param.deblocking_filter_tile_column = &common_res->deblocking_filter_tile_column_buffer;
5246 param.metadata_line = &common_res->metadata_line_buffer;
5247 param.metadata_tile_line = &common_res->metadata_tile_line_buffer;
5248 param.metadata_tile_column = &common_res->metadata_tile_column_buffer;
5249 param.sao_line = &common_res->sao_line_buffer;
5250 param.sao_tile_line = &common_res->sao_tile_line_buffer;
5251 param.sao_tile_column = &common_res->sao_tile_column_buffer;
5254 param.current_motion_vector_temporal = &surface_priv->motion_vector_temporal;
5256 for (i = 0; i < 8; i++) {
5257 if (common_res->reference_pics[i].obj_surface)
5258 param.reference_picture[i] = &common_res->reference_pics[i].gpe_res;
5261 param.uncompressed_picture = &common_res->uncompressed_pic.gpe_res;
5262 param.streamout_data_destination = &common_res->streamout_data_destination_buffer;
5263 param.picture_status = &common_res->picture_status_buffer;
5264 param.ildb_streamout = &common_res->ildb_streamout_buffer;
5266 for (i = 0; i < 8; i++) {
5267 if (common_res->reference_pics[i].obj_surface) {
5268 surface_priv = (struct gen10_hevc_surface_priv *)common_res->
5269 reference_pics[i].obj_surface->private_data;
5271 param.collocated_motion_vector_temporal[i] =
5272 &surface_priv->motion_vector_temporal;
5276 param.sao_streamout_data_destination = &common_res->sao_streamout_data_destination_buffer;
5277 param.frame_statics_streamout_data_destination =
5278 &common_res->frame_statics_streamout_data_destination_buffer;
5279 param.sse_source_pixel_rowstore = &common_res->sse_source_pixel_rowstore_buffer;
5281 gen10_hcp_pipe_buf_addr_state(ctx, batch, ¶m);
5285 gen10_hevc_hcp_ind_obj_base_addr_state(VADriverContextP ctx,
5286 struct encode_state *encode_state,
5287 struct intel_encoder_context *encoder_context,
5288 struct intel_batchbuffer *batch)
5290 struct gen10_hevc_enc_context *pak_context;
5291 struct gen10_hevc_enc_state *hevc_state;
5292 gen10_hcp_ind_obj_base_addr_state_param param;
5294 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5295 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5297 memset(¶m, 0, sizeof(param));
5299 param.ind_cu_obj_bse = &pak_context->res_mb_code_surface;
5300 param.ind_cu_obj_bse_offset = hevc_state->cu_records_offset;
5302 param.ind_pak_bse = &pak_context->common_res.compressed_bitstream.gpe_res;
5303 param.ind_pak_bse_offset = pak_context->common_res.compressed_bitstream.offset;
5304 param.ind_pak_bse_upper = pak_context->common_res.compressed_bitstream.end_offset;
5306 gen10_hcp_ind_obj_base_addr_state(ctx, batch, ¶m);
5310 gen10_hevc_hcp_qm_fqm_state(VADriverContextP ctx,
5311 struct encode_state *encode_state,
5312 struct intel_encoder_context *encoder_context,
5313 struct intel_batchbuffer *batch)
5315 struct gen10_hevc_enc_context *pak_context;
5317 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5319 gen10_hevc_enc_hcp_set_qm_fqm_states(ctx, batch, &pak_context->frame_info);
5323 gen10_hevc_hcp_pic_state(VADriverContextP ctx,
5324 struct encode_state *encode_state,
5325 struct intel_encoder_context *encoder_context,
5326 struct intel_batchbuffer *batch)
5328 struct gen10_hevc_enc_context *pak_context;
5329 struct gen10_hevc_enc_state *hevc_state;
5330 VAEncSequenceParameterBufferHEVC *seq_param;
5331 VAEncPictureParameterBufferHEVC *pic_param;
5332 VAEncSliceParameterBufferHEVC *slice_param;
5333 struct gen10_hevc_enc_frame_info *frame_info;
5334 gen10_hcp_pic_state_param param;
5336 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5337 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5339 frame_info = &pak_context->frame_info;
5340 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
5341 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5342 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
5344 memset(¶m, 0, sizeof(param));
5345 param.dw1.frame_width_in_cu_minus1 = frame_info->width_in_cu - 1;
5346 param.dw1.frame_height_in_cu_minus1 = frame_info->height_in_cu - 1;
5347 param.dw1.pak_transform_skip = pic_param->pic_fields.bits.transform_skip_enabled_flag;
5349 param.dw2.min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3;
5350 param.dw2.lcu_size = seq_param->log2_min_luma_coding_block_size_minus3 +
5351 seq_param->log2_diff_max_min_luma_coding_block_size;
5352 param.dw2.min_tu_size = seq_param->log2_min_transform_block_size_minus2;
5353 param.dw2.max_tu_size = seq_param->log2_min_transform_block_size_minus2 +
5354 seq_param->log2_diff_max_min_transform_block_size;
5355 param.dw2.min_pcm_size = 0;
5356 param.dw2.max_pcm_size = 0;
5358 if ((slice_param->slice_fields.bits.slice_sao_luma_flag ||
5359 slice_param->slice_fields.bits.slice_sao_chroma_flag) &&
5360 !frame_info->bit_depth_luma_minus8)
5361 param.dw4.sao_enabled_flag = 1;
5363 if (pic_param->pic_fields.bits.cu_qp_delta_enabled_flag) {
5364 param.dw4.cu_qp_delta_enabled_flag = 1;
5365 param.dw4.diff_cu_qp_delta_depth = pic_param->diff_cu_qp_delta_depth;
5368 param.dw4.pcm_loop_filter_disable_flag = seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag;
5369 param.dw4.weighted_bipred_flag = pic_param->pic_fields.bits.weighted_bipred_flag;
5370 param.dw4.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
5371 param.dw4.transform_skip_enabled_flag = pic_param->pic_fields.bits.transform_skip_enabled_flag;
5372 param.dw4.amp_enabled_flag = seq_param->seq_fields.bits.amp_enabled_flag;
5373 param.dw4.transquant_bypass_enabled_flag = pic_param->pic_fields.bits.transquant_bypass_enabled_flag;
5374 param.dw4.strong_intra_smoothing_enabled_flag = seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag;
5376 param.dw5.pic_cb_qp_offset = pic_param->pps_cr_qp_offset & 0x1f;
5377 param.dw5.pic_cr_qp_offset = pic_param->pps_cb_qp_offset & 0x1f;
5378 param.dw5.max_transform_hierarchy_depth_intra = seq_param->max_transform_hierarchy_depth_intra;
5379 param.dw5.max_transform_hierarchy_depth_inter = seq_param->max_transform_hierarchy_depth_inter;
5380 param.dw5.pcm_sample_bit_depth_chroma_minus1 = seq_param->pcm_sample_bit_depth_chroma_minus1;
5381 param.dw5.pcm_sample_bit_depth_luma_minus1 = seq_param->pcm_sample_bit_depth_luma_minus1;
5382 param.dw5.bit_depth_chroma_minus8 = seq_param->seq_fields.bits.bit_depth_chroma_minus8;
5383 param.dw5.bit_depth_luma_minus8 = seq_param->seq_fields.bits.bit_depth_luma_minus8;
5385 param.dw6.lcu_max_bits_allowed = frame_info->ctu_max_bitsize_allowed;
5387 param.dw19.rho_domain_rc_enabled = 0;
5388 param.dw19.rho_domain_frame_qp = 0;
5389 param.dw19.fraction_qp_adj_enabled = 0;
5390 param.dw19.first_slice_segment_in_pic_flag = 1;
5391 param.dw19.nal_unit_type_flag = 1;
5392 param.dw19.sse_enabled = 1;
5393 param.dw19.rhoq_enabled = hevc_state->rdoq_enabled;
5395 gen10_hcp_pic_state(ctx, batch, ¶m);
5399 gen10_hevc_hcp_rdoq_state(VADriverContextP ctx,
5400 struct encode_state *encode_state,
5401 struct intel_encoder_context *encoder_context,
5402 struct intel_batchbuffer *batch)
5404 struct gen10_hevc_enc_context *pak_context;
5405 gen10_hcp_rdoq_state_param param;
5407 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5409 memset(¶m, 0, sizeof(param));
5411 memcpy(param.lambda_intra_luma, pak_context->lambda_param.lambda_intra[0],
5412 sizeof(param.lambda_intra_luma));
5413 memcpy(param.lambda_intra_chroma, pak_context->lambda_param.lambda_intra[1],
5414 sizeof(param.lambda_intra_chroma));
5415 memcpy(param.lambda_inter_luma, pak_context->lambda_param.lambda_inter[0],
5416 sizeof(param.lambda_inter_luma));
5417 memcpy(param.lambda_inter_chroma, pak_context->lambda_param.lambda_inter[1],
5418 sizeof(param.lambda_inter_chroma));
5420 gen10_hcp_rdoq_state(ctx, batch, ¶m);
5424 gen10_hevc_pak_picture_level(VADriverContextP ctx,
5425 struct encode_state *encode_state,
5426 struct intel_encoder_context *encoder_context)
5428 struct intel_batchbuffer *batch = encoder_context->base.batch;
5429 struct gen10_hevc_enc_context *pak_context;
5430 struct gen10_hevc_enc_state *hevc_state;
5432 pak_context = (struct gen10_hevc_enc_context *)encoder_context->mfc_context;
5433 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5435 gen10_hevc_hcp_pipe_mode_select(ctx, encode_state, encoder_context, batch);
5436 gen10_hevc_hcp_multi_surfaces(ctx, encode_state, encoder_context, batch);
5437 gen10_hevc_hcp_pipe_buf_state(ctx, encode_state, encoder_context, batch);
5438 gen10_hevc_hcp_ind_obj_base_addr_state(ctx, encode_state, encoder_context, batch);
5439 gen10_hevc_hcp_qm_fqm_state(ctx, encode_state, encoder_context, batch);
5441 if (hevc_state->brc.brc_enabled) {
5442 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5444 memset(&second_level_batch, 0, sizeof(second_level_batch));
5445 second_level_batch.offset = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS *
5446 hevc_state->curr_pak_idx;
5447 second_level_batch.is_second_level = 1;
5448 second_level_batch.bo = pak_context->res_brc_pic_image_state_write_buffer.bo;
5450 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5452 gen10_hevc_hcp_pic_state(ctx, encode_state, encoder_context, batch);
5454 if (hevc_state->rdoq_enabled)
5455 gen10_hevc_hcp_rdoq_state(ctx, encode_state, encoder_context, batch);
5459 gen10_hevc_hcp_weightoffset(VADriverContextP ctx,
5460 struct encode_state *encode_state,
5461 struct intel_encoder_context *encoder_context,
5462 struct intel_batchbuffer *batch,
5465 VAEncPictureParameterBufferHEVC *pic_param;
5466 VAEncSliceParameterBufferHEVC *slice_param;
5468 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5469 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5471 gen10_hevc_enc_hcp_set_weight_offsets(ctx, batch, pic_param, slice_param);
5475 gen10_hevc_ref_idx_lists(VADriverContextP ctx,
5476 struct encode_state *encode_state,
5477 struct intel_encoder_context *encoder_context,
5478 struct intel_batchbuffer *batch,
5481 VAEncPictureParameterBufferHEVC *pic_param;
5482 VAEncSliceParameterBufferHEVC *slice_param;
5484 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5485 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5487 if (slice_param->slice_type != HEVC_SLICE_I)
5488 gen10_hevc_enc_hcp_set_ref_idx_lists(ctx, batch, pic_param, slice_param);
5492 gen10_hevc_hcp_slice_state(VADriverContextP ctx,
5493 struct encode_state *encode_state,
5494 struct intel_encoder_context *encoder_context,
5495 struct intel_batchbuffer *batch,
5498 struct gen10_hevc_enc_context *pak_context;
5499 struct gen10_hevc_enc_state *hevc_state;
5500 VAEncPictureParameterBufferHEVC *pic_param;
5501 VAEncSliceParameterBufferHEVC *slice_param;
5502 gen10_hcp_slice_state_param param;
5503 int last_slice, slice_qp, qp_idx;
5505 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5506 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5508 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5509 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5511 memset(¶m, 0, sizeof(param));
5513 param.dw1.slice_start_ctu_x = slice_param->slice_segment_address %
5514 pak_context->frame_info.width_in_lcu;
5515 param.dw1.slice_start_ctu_y = slice_param->slice_segment_address /
5516 pak_context->frame_info.width_in_lcu;
5518 if (slice_index == encode_state->num_slice_params_ext - 1) {
5519 param.dw2.next_slice_start_ctu_x = 0;
5520 param.dw2.next_slice_start_ctu_y = 0;
5524 last_slice = slice_param->slice_segment_address + slice_param->num_ctu_in_slice;
5526 param.dw2.next_slice_start_ctu_x = last_slice %
5527 pak_context->frame_info.width_in_lcu;
5528 param.dw2.next_slice_start_ctu_y = last_slice /
5529 pak_context->frame_info.width_in_lcu;
5534 param.dw3.slice_type = slice_param->slice_type;
5535 param.dw3.last_slice_flag = last_slice;
5536 param.dw3.slice_temporal_mvp_enabled = slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag;
5537 param.dw3.slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5538 param.dw3.slice_cb_qp_offset = slice_param->slice_cb_qp_offset;
5539 param.dw3.slice_cr_qp_offset = slice_param->slice_cr_qp_offset;
5541 param.dw4.deblocking_filter_disable = slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag;
5542 param.dw4.tc_offset_div2 = slice_param->slice_tc_offset_div2 & 0xf;
5543 param.dw4.beta_offset_div2 = slice_param->slice_beta_offset_div2 & 0xf;
5544 param.dw4.sao_chroma_flag = slice_param->slice_fields.bits.slice_sao_chroma_flag;
5545 param.dw4.sao_luma_flag = slice_param->slice_fields.bits.slice_sao_luma_flag;
5546 param.dw4.mvd_l1_zero_flag = slice_param->slice_fields.bits.mvd_l1_zero_flag;
5547 param.dw4.is_low_delay = slice_param->slice_type != HEVC_SLICE_B ? 1 : hevc_state->low_delay;
5548 param.dw4.collocated_from_l0_flag = slice_param->slice_fields.bits.collocated_from_l0_flag;
5549 param.dw4.chroma_log2_weight_denom = slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom;
5550 param.dw4.luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
5551 param.dw4.cabac_init_flag = slice_param->slice_fields.bits.cabac_init_flag;
5552 param.dw4.max_merge_idx = slice_param->max_num_merge_cand - 1;
5554 if (pic_param->collocated_ref_pic_index != 0xFF)
5555 param.dw4.collocated_ref_idx = pic_param->collocated_ref_pic_index;
5557 param.dw6.round_intra = 10;
5558 param.dw6.round_inter = 4;
5560 param.dw7.cabac_zero_word_insertion_enabled = 1;
5561 param.dw7.emulation_byte_insert_enabled = 1;
5562 param.dw7.slice_data_enabled = 1;
5563 param.dw7.header_insertion_enabled = 1;
5565 if (pic_param->pic_fields.bits.transform_skip_enabled_flag) {
5566 slice_qp = pak_context->frame_info.slice_qp;
5570 else if (slice_qp <= 27)
5572 else if (slice_qp <= 32)
5577 param.dw9.transform_skip_lambda = gen10_hevc_tr_lambda_coeffs[slice_qp];
5579 if (slice_param->slice_type == HEVC_SLICE_I) {
5580 param.dw10.transform_skip_zero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][0][0];
5581 param.dw10.transform_skip_nonezero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][1][0];
5582 param.dw10.transform_skip_zero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][0][1] + 32;
5583 param.dw10.transform_skip_nonezero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][1][1] + 32;
5585 param.dw10.transform_skip_zero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][0][0];
5586 param.dw10.transform_skip_nonezero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][1][0];
5587 param.dw10.transform_skip_zero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][0][1] + 32;
5588 param.dw10.transform_skip_nonezero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][1][1] + 32;
5592 gen10_hcp_slice_state(ctx, batch, ¶m);
5596 gen10_hevc_pak_slice_level(VADriverContextP ctx,
5597 struct encode_state *encode_state,
5598 struct intel_encoder_context *encoder_context)
5600 struct intel_batchbuffer *batch = encoder_context->base.batch;
5601 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5602 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5603 VAEncSliceParameterBufferHEVC *slice_param;
5608 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
5609 for (j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
5610 slice_param = (VAEncSliceParameterBufferHEVC *)(encode_state->slice_params_ext[slice_index]->buffer);
5612 gen10_hevc_ref_idx_lists(ctx, encode_state, encoder_context, batch, slice_index);
5614 gen10_hevc_hcp_weightoffset(ctx, encode_state, encoder_context,
5615 batch, slice_index);
5617 gen10_hevc_hcp_slice_state(ctx, encode_state, encoder_context,
5618 batch, slice_index);
5620 if (slice_index == 0)
5621 gen10_hevc_enc_insert_packed_header(ctx, encode_state, encoder_context,
5624 gen10_hevc_enc_insert_slice_header(ctx, encode_state, encoder_context,
5625 batch, slice_index);
5628 memset(&second_level_batch, 0, sizeof(second_level_batch));
5629 second_level_batch.offset = 32 * slice_param->slice_segment_address;
5630 second_level_batch.is_second_level = 1;
5631 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5633 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5641 gen10_hevc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5643 struct intel_batchbuffer *batch = encoder_context->base.batch;
5644 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5645 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5646 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
5647 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5648 struct gen10_hevc_enc_status_buffer *status_buffer;
5649 struct gen10_hevc_enc_state *hevc_state;
5652 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5653 status_buffer = &pak_context->status_buffer;
5655 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5656 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5658 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5659 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5660 mi_store_reg_mem_param.offset = status_buffer->status_bytes_per_frame_offset;
5661 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bytes_per_frame_offset;
5662 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5664 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5665 mi_store_reg_mem_param.offset = status_buffer->status_image_mask_offset;
5666 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_mask_offset;
5667 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5669 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5670 mi_store_reg_mem_param.offset = status_buffer->status_image_ctrl_offset;
5671 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5672 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5674 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5675 mi_store_reg_mem_param.offset = status_buffer->status_qp_status_offset;
5676 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_qp_status_offset;
5677 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5679 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5680 mi_store_reg_mem_param.offset = status_buffer->status_bs_se_bitcount_offset;
5681 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bs_se_bitcount_offset;
5682 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5684 write_pak_idx = hevc_state->curr_pak_stat_index;
5685 mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5686 mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_bs_frame);
5687 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bytes_per_frame_offset;
5688 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5690 mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5691 mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_bs_frame_noheader);
5692 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bs_frame_no_header_offset;
5693 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5695 mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5696 mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_image_status_control);
5697 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5698 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5700 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
5701 mi_store_data_imm_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5702 mi_store_data_imm_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_image_status_ctl_last_pass);
5703 mi_store_data_imm_param.dw0 = hevc_state->curr_pak_idx;
5704 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
5706 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5710 gen10_hevc_pak_brc_prepare(struct encode_state *encode_state,
5711 struct intel_encoder_context *encoder_context)
5717 gen10_hevc_pak_context_destroy(void *context)
5723 gen10_hevc_pak_pipeline(VADriverContextP ctx,
5725 struct encode_state *encode_state,
5726 struct intel_encoder_context *encoder_context)
5728 struct i965_driver_data *i965 = i965_driver_data(ctx);
5729 struct intel_batchbuffer *batch = encoder_context->base.batch;
5730 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5731 struct gen10_hevc_enc_status_buffer *status_buffer;
5732 struct gen10_hevc_enc_state *hevc_state;
5733 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5734 struct gpe_mi_load_register_mem_parameter mi_load_reg_mem;
5735 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5738 if (!pak_context || !pak_context->enc_priv_state)
5739 return VA_STATUS_ERROR_INVALID_CONTEXT;
5741 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5742 status_buffer = &pak_context->status_buffer;
5744 if (i965->intel.has_bsd2)
5745 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5747 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5749 intel_batchbuffer_emit_mi_flush(batch);
5751 for (hevc_state->curr_pak_idx = 0;
5752 hevc_state->curr_pak_idx < hevc_state->num_pak_passes;
5753 hevc_state->curr_pak_idx++) {
5754 if (hevc_state->curr_pak_idx == 0) {
5755 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5756 mi_load_reg_imm.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5757 mi_load_reg_imm.data = 0;
5758 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5759 } else if (hevc_state->brc.brc_enabled) {
5760 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5761 mi_cond_end.offset = status_buffer->status_image_mask_offset;
5762 mi_cond_end.bo = status_buffer->gpe_res.bo;
5763 mi_cond_end.compare_data = 0;
5764 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5767 memset(&mi_load_reg_mem, 0, sizeof(mi_load_reg_mem));
5768 mi_load_reg_mem.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5769 mi_load_reg_mem.bo = status_buffer->gpe_res.bo;
5770 mi_load_reg_mem.offset = status_buffer->status_image_ctrl_offset;
5771 gen8_gpe_mi_load_register_mem(ctx, batch, &mi_load_reg_mem);
5774 gen10_hevc_pak_picture_level(ctx, encode_state, encoder_context);
5775 gen10_hevc_pak_slice_level(ctx, encode_state, encoder_context);
5776 gen10_hevc_read_mfc_status(ctx, encoder_context);
5779 intel_batchbuffer_end_atomic(batch);
5780 intel_batchbuffer_flush(batch);
5782 if (hevc_state->sao_2nd_needed) {
5783 if (i965->intel.has_bsd2)
5784 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5786 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5788 intel_batchbuffer_emit_mi_flush(batch);
5790 BEGIN_BCS_BATCH(batch, 64);
5791 for (i = 0; i < 64; i++)
5792 OUT_BCS_BATCH(batch, MI_NOOP);
5794 ADVANCE_BCS_BATCH(batch);
5795 gen10_hevc_pak_picture_level(ctx, encode_state, encoder_context);
5796 gen10_hevc_pak_slice_level(ctx, encode_state, encoder_context);
5797 gen10_hevc_read_mfc_status(ctx, encoder_context);
5798 intel_batchbuffer_end_atomic(batch);
5799 intel_batchbuffer_flush(batch);
5802 hevc_state->curr_pak_stat_index ^= 1;
5804 hevc_state->frame_number++;
5806 return VA_STATUS_SUCCESS;
5810 gen10_hevc_vme_context_destroy(void *context)
5812 struct gen10_hevc_enc_context *vme_context = context;
5818 gen10_hevc_free_enc_resources(context);
5820 gen10_hevc_enc_free_common_resource(&vme_context->common_res);
5822 gen8_gpe_context_destroy(&vme_context->scaling_context.gpe_context);
5824 gen8_gpe_context_destroy(&vme_context->me_context.gpe_context);
5826 for (i = 0; i < GEN10_HEVC_BRC_NUM; i++)
5827 gen8_gpe_context_destroy(&vme_context->brc_context.gpe_contexts[i]);
5829 for (i = 0; i < GEN10_HEVC_MBENC_NUM; i++)
5830 gen8_gpe_context_destroy(&vme_context->mbenc_context.gpe_contexts[i]);
5832 if (vme_context->enc_priv_state)
5833 free(vme_context->enc_priv_state);
5839 gen10_hevc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5841 struct gen10_hevc_enc_context *vme_context = NULL;
5842 struct gen10_hevc_enc_state *hevc_state = NULL;
5844 vme_context = calloc(1, sizeof(struct gen10_hevc_enc_context));
5845 hevc_state = calloc(1, sizeof(struct gen10_hevc_enc_state));
5847 if (!vme_context || !hevc_state) {
5857 vme_context->enc_priv_state = hevc_state;
5859 gen10_hevc_vme_init_kernels_context(ctx, encoder_context, vme_context);
5861 hevc_state->use_hw_scoreboard = 1;
5862 hevc_state->use_hw_non_stalling_scoreboard = 0;
5863 hevc_state->num_regions_in_slice = 1;
5864 hevc_state->rdoq_enabled = 1;
5866 encoder_context->vme_context = vme_context;
5867 encoder_context->vme_pipeline = gen10_hevc_vme_pipeline;
5868 encoder_context->vme_context_destroy = gen10_hevc_vme_context_destroy;
5874 gen10_hevc_get_coded_status(VADriverContextP ctx,
5875 struct intel_encoder_context *encoder_context,
5876 struct i965_coded_buffer_segment *coded_buf_seg)
5878 struct gen10_hevc_enc_status *enc_status;
5880 if (!encoder_context || !coded_buf_seg)
5881 return VA_STATUS_ERROR_INVALID_BUFFER;
5883 enc_status = (struct gen10_hevc_enc_status *)coded_buf_seg->codec_private_data;
5884 coded_buf_seg->base.size = enc_status->bytes_per_frame;
5886 return VA_STATUS_SUCCESS;
5890 gen10_hevc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5892 struct gen10_hevc_enc_context *pak_context = encoder_context->vme_context;
5897 encoder_context->mfc_context = pak_context;
5898 encoder_context->mfc_context_destroy = gen10_hevc_pak_context_destroy;
5899 encoder_context->mfc_pipeline = gen10_hevc_pak_pipeline;
5900 encoder_context->mfc_brc_prepare = gen10_hevc_pak_brc_prepare;
5901 encoder_context->get_status = gen10_hevc_get_coded_status;