OSDN Git Service

Remove spurious assignations
[android-x86/hardware-intel-common-vaapi.git] / src / gen10_hevc_encoder.c
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *    Chen, Peng  <peng.c.chen@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40 #include "intel_media.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_api.h"
45 #include "i965_encoder_utils.h"
46 #include "gen10_hcp_common.h"
47 #include "gen10_hevc_enc_common.h"
48 #include "gen10_hevc_enc_kernel.h"
49 #include "gen10_hevc_enc_const_def.h"
50 #include "gen10_hevc_enc_kernels_binary.h"
51 #include "gen10_hevc_encoder.h"
52
53 static bool
54 gen10_hevc_get_kernel_header_and_size(void *pvbinary,
55                                       int binary_size,
56                                       GEN10_HEVC_ENC_OPERATION operation,
57                                       int krnstate_idx,
58                                       struct i965_kernel *ret_kernel)
59 {
60     typedef uint32_t BIN_PTR[4];
61
62     gen10_hevc_kernel_header *pkh_table;
63     gen10_intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
64     char *bin_start;
65     int next_krnoffset;
66     int not_found = 0;
67
68     if (!pvbinary || !ret_kernel)
69         return false;
70
71     bin_start = (char *)pvbinary;
72     pkh_table = (gen10_hevc_kernel_header *)pvbinary;
73     pinvalid_entry = &(pkh_table->hevc_last) + 1;
74     next_krnoffset = binary_size;
75
76     switch (operation) {
77     case GEN10_HEVC_ENC_SCALING_CONVERSION:
78         pcurr_header = &pkh_table->hevc_ds_convert;
79         break;
80     case GEN10_HEVC_ENC_ME:
81         pcurr_header = &pkh_table->hevc_hme;
82         break;
83     case GEN10_HEVC_ENC_BRC:
84         switch (krnstate_idx) {
85         case 0:
86             pcurr_header = &pkh_table->hevc_brc_init;
87             break;
88         case 1:
89             pcurr_header = &pkh_table->hevc_brc_init;
90             break;
91         case 2:
92             pcurr_header = &pkh_table->hevc_brc_update;
93             break;
94         case 3:
95             pcurr_header = &pkh_table->hevc_brc_lcuqp;
96             break;
97         default:
98             not_found = 1;
99             break;
100         }
101         break;
102
103     case GEN10_HEVC_ENC_MBENC:
104         switch (krnstate_idx) {
105         case 0:
106             pcurr_header = &pkh_table->hevc_intra;
107             break;
108         case 1:
109             pcurr_header = &pkh_table->hevc_enc;
110             break;
111         case 2:
112             pcurr_header = &pkh_table->hevc_enc_lcu64;
113             break;
114         default:
115             not_found = 1;
116             break;
117         }
118
119         break;
120     default:
121         not_found = 1;
122         break;
123     }
124
125     if (not_found) {
126         return false;
127     }
128
129     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
130
131     pnext_header = (pcurr_header + 1);
132     if (pnext_header < pinvalid_entry)
133         next_krnoffset = pnext_header->kernel_start_pointer << 6;
134
135     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
136
137     return true;
138 }
139
140 #define MAX_HEVC_ENCODER_SURFACES        64
141 #define MAX_URB_SIZE                     4096
142 #define NUM_KERNELS_PER_GPE_CONTEXT      1
143
144 static void
145 gen10_hevc_init_gpe_context(VADriverContextP ctx,
146                             struct i965_gpe_context *gpe_context,
147                             struct gen10_hevc_enc_kernel_parameter *kernel_param)
148 {
149     struct i965_driver_data *i965 = i965_driver_data(ctx);
150
151     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
152
153     gpe_context->sampler.entry_size = 0;
154     gpe_context->sampler.max_entries = 0;
155     if (kernel_param->sampler_size) {
156         gpe_context->sampler.entry_size = kernel_param->sampler_size;
157         gpe_context->sampler.max_entries = 1;
158     }
159
160     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
161     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
162
163     gpe_context->surface_state_binding_table.max_entries = MAX_HEVC_ENCODER_SURFACES;
164     gpe_context->surface_state_binding_table.binding_table_offset = 0;
165     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_HEVC_ENCODER_SURFACES * 4, 64);
166     gpe_context->surface_state_binding_table.length = ALIGN(MAX_HEVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_HEVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
167
168     if (i965->intel.eu_total > 0)
169         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
170     else
171         gpe_context->vfe_state.max_num_threads = 112;
172
173     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
174     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
175     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
176                                               gpe_context->vfe_state.curbe_allocation_size -
177                                               ((gpe_context->idrt.entry_size >> 5) *
178                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
179     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
180     gpe_context->vfe_state.gpgpu_mode = 0;
181 }
182
183 static void
184 gen10_hevc_init_vfe_scoreboard(struct i965_gpe_context *gpe_context,
185                                struct gen10_hevc_enc_scoreboard_parameter *scoreboard_param)
186 {
187     if (!gpe_context || !scoreboard_param)
188         return;
189
190     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
191     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
192     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
193
194     if (scoreboard_param->no_dependency) {
195         gpe_context->vfe_desc5.scoreboard0.mask = 0x0;
196         gpe_context->vfe_desc5.scoreboard0.enable = 0;
197         gpe_context->vfe_desc5.scoreboard0.type = 0;
198
199         gpe_context->vfe_desc6.dword = 0;
200         gpe_context->vfe_desc7.dword = 0;
201     } else {
202         gpe_context->vfe_desc5.scoreboard0.mask = 0x7F;
203         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
204         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
205
206         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0xF;
207         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
208
209         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x0;
210         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
211
212         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 1;
213         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
214
215         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0;
216         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0;
217         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
218         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0;
219         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0;
220         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0;
221         gpe_context->vfe_desc7.scoreboard2.delta_x7 = 0;
222         gpe_context->vfe_desc7.scoreboard2.delta_y7 = 0;
223     }
224 }
225
226 static void
227 gen10_hevc_vme_init_scaling_context(VADriverContextP ctx,
228                                     struct gen10_hevc_enc_context *vme_context,
229                                     struct gen10_scaling_context *scaling_context)
230 {
231     struct gen10_hevc_enc_state *hevc_state;
232     struct i965_gpe_context *gpe_context = NULL;
233     struct gen10_hevc_enc_kernel_parameter kernel_param;
234     struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
235     struct i965_kernel scale_kernel;
236
237     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
238
239     kernel_param.curbe_size = sizeof(gen10_hevc_scaling_curbe_data);
240     kernel_param.inline_data_size = sizeof(gen10_hevc_scaling_curbe_data);
241     kernel_param.sampler_size = 0;
242
243     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
244     scoreboard_param.mask = 0xFF;
245     scoreboard_param.enable = hevc_state->use_hw_scoreboard;
246     scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
247     scoreboard_param.no_dependency = true;
248
249     gpe_context = &scaling_context->gpe_context;
250     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
251     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
252
253     memset(&scale_kernel, 0, sizeof(scale_kernel));
254
255     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
256                                           sizeof(gen10_media_hevc_kernels),
257                                           GEN10_HEVC_ENC_SCALING_CONVERSION,
258                                           0,
259                                           &scale_kernel);
260
261     gen8_gpe_load_kernels(ctx,
262                           gpe_context,
263                           &scale_kernel,
264                           1);
265 }
266
267 static void
268 gen10_hevc_vme_init_me_context(VADriverContextP ctx,
269                                struct gen10_hevc_enc_context *vme_context,
270                                struct gen10_me_context *me_context)
271 {
272     struct gen10_hevc_enc_state *hevc_state;
273     struct i965_gpe_context *gpe_context = NULL;
274     struct gen10_hevc_enc_kernel_parameter kernel_param;
275     struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
276     struct i965_kernel me_kernel;
277
278     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
279
280     kernel_param.curbe_size = sizeof(gen10_hevc_me_curbe_data);
281     kernel_param.inline_data_size = sizeof(gen10_hevc_me_curbe_data);
282     kernel_param.sampler_size = 0;
283
284     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
285     scoreboard_param.mask = 0xFF;
286     scoreboard_param.enable = hevc_state->use_hw_scoreboard;
287     scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
288     scoreboard_param.no_dependency = true;
289
290     gpe_context = &me_context->gpe_context;
291     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
292     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
293
294     memset(&me_kernel, 0, sizeof(me_kernel));
295
296     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
297                                           sizeof(gen10_media_hevc_kernels),
298                                           GEN10_HEVC_ENC_ME,
299                                           0,
300                                           &me_kernel);
301
302     gen8_gpe_load_kernels(ctx,
303                           gpe_context,
304                           &me_kernel,
305                           1);
306 }
307
308 static void
309 gen10_hevc_vme_init_mbenc_context(VADriverContextP ctx,
310                                   struct gen10_hevc_enc_context *vme_context,
311                                   struct gen10_mbenc_context *mbenc_context)
312 {
313     struct gen10_hevc_enc_state *hevc_state;
314     struct i965_gpe_context *gpe_context = NULL;
315     struct gen10_hevc_enc_kernel_parameter kernel_param;
316     struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
317     struct i965_kernel mbenc_kernel;
318
319     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
320
321     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
322     scoreboard_param.mask = 0xFF;
323     scoreboard_param.enable = hevc_state->use_hw_scoreboard;
324     scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
325
326     gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_I_KRNIDX_G10];
327     kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_intra_curbe_data);
328     kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_intra_curbe_data);
329     kernel_param.sampler_size = 0;
330     scoreboard_param.no_dependency = false;
331     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
332
333     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
334
335     memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
336
337     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
338                                           sizeof(gen10_media_hevc_kernels),
339                                           GEN10_HEVC_ENC_MBENC,
340                                           GEN10_HEVC_MBENC_I_KRNIDX_G10,
341                                           &  mbenc_kernel);
342
343     gen8_gpe_load_kernels(ctx,
344                           gpe_context,
345                           &mbenc_kernel,
346                           1);
347
348     gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10];
349     kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
350     kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
351     kernel_param.sampler_size = 0;
352     scoreboard_param.no_dependency = false;
353     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
354     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
355
356     memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
357
358     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
359                                           sizeof(gen10_media_hevc_kernels),
360                                           GEN10_HEVC_ENC_MBENC,
361                                           GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10,
362                                           &mbenc_kernel);
363     gen8_gpe_load_kernels(ctx,
364                           gpe_context,
365                           &mbenc_kernel,
366                           1);
367
368     gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10];
369     kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
370     kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
371     kernel_param.sampler_size = 0;
372     scoreboard_param.no_dependency = false;
373     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
374     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
375
376     memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
377
378     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
379                                           sizeof(gen10_media_hevc_kernels),
380                                           GEN10_HEVC_ENC_MBENC,
381                                           GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10,
382                                           &mbenc_kernel);
383
384     gen8_gpe_load_kernels(ctx,
385                           gpe_context,
386                           &mbenc_kernel,
387                           1);
388 }
389
390 static void
391 gen10_hevc_vme_init_brc_context(VADriverContextP ctx,
392                                 struct gen10_hevc_enc_context *vme_context,
393                                 struct gen10_brc_context *brc_context)
394 {
395     struct gen10_hevc_enc_state *hevc_state;
396     struct i965_gpe_context *gpe_context = NULL;
397     struct gen10_hevc_enc_kernel_parameter kernel_param;
398     struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
399     struct i965_kernel brc_kernel;
400
401     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
402
403     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
404     scoreboard_param.mask = 0xFF;
405     scoreboard_param.enable = hevc_state->use_hw_scoreboard;
406     scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
407
408     gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_INIT];
409     kernel_param.curbe_size = sizeof(gen10_hevc_brc_init_curbe_data);
410     kernel_param.inline_data_size = sizeof(gen10_hevc_brc_init_curbe_data);
411     kernel_param.sampler_size = 0;
412     scoreboard_param.no_dependency = true;
413     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
414     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
415
416     memset(&brc_kernel, 0, sizeof(brc_kernel));
417
418     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
419                                           sizeof(gen10_media_hevc_kernels),
420                                           GEN10_HEVC_ENC_BRC,
421                                           GEN10_HEVC_BRC_INIT,
422                                           &brc_kernel);
423
424     gen8_gpe_load_kernels(ctx,
425                           gpe_context,
426                           &brc_kernel,
427                           1);
428
429     gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_RESET];
430     kernel_param.curbe_size = sizeof(gen10_hevc_brc_init_curbe_data);
431     kernel_param.inline_data_size = sizeof(gen10_hevc_brc_init_curbe_data);
432     kernel_param.sampler_size = 0;
433     scoreboard_param.no_dependency = true;
434     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
435     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
436
437     memset(&brc_kernel, 0, sizeof(brc_kernel));
438
439     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
440                                           sizeof(gen10_media_hevc_kernels),
441                                           GEN10_HEVC_ENC_BRC,
442                                           GEN10_HEVC_BRC_RESET,
443                                           &brc_kernel);
444
445     gen8_gpe_load_kernels(ctx,
446                           gpe_context,
447                           &brc_kernel,
448                           1);
449
450     gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_FRAME_UPDATE];
451     kernel_param.curbe_size = sizeof(gen10_hevc_brc_update_curbe_data);
452     kernel_param.inline_data_size = sizeof(gen10_hevc_brc_update_curbe_data);
453     kernel_param.sampler_size = 0;
454     scoreboard_param.no_dependency = true;
455     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
456     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
457
458     memset(&brc_kernel, 0, sizeof(brc_kernel));
459
460     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
461                                           sizeof(gen10_media_hevc_kernels),
462                                           GEN10_HEVC_ENC_BRC,
463                                           GEN10_HEVC_BRC_FRAME_UPDATE,
464                                           &brc_kernel);
465
466     gen8_gpe_load_kernels(ctx,
467                           gpe_context,
468                           &brc_kernel,
469                           1);
470
471     gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_LCU_UPDATE];
472     kernel_param.curbe_size = sizeof(gen10_hevc_brc_update_curbe_data);
473     kernel_param.inline_data_size = sizeof(gen10_hevc_brc_update_curbe_data);
474     kernel_param.sampler_size = 0;
475     scoreboard_param.no_dependency = true;
476     gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
477     gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
478
479     memset(&brc_kernel, 0, sizeof(brc_kernel));
480
481     gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
482                                           sizeof(gen10_media_hevc_kernels),
483                                           GEN10_HEVC_ENC_BRC,
484                                           GEN10_HEVC_BRC_LCU_UPDATE,
485                                           &brc_kernel);
486
487     gen8_gpe_load_kernels(ctx,
488                           gpe_context,
489                           &brc_kernel,
490                           1);
491 }
492
493 static void
494 gen10_hevc_vme_init_kernels_context(VADriverContextP ctx,
495                                     struct intel_encoder_context *encoder_context,
496                                     struct gen10_hevc_enc_context *vme_context)
497 {
498     gen10_hevc_vme_init_scaling_context(ctx, vme_context, &vme_context->scaling_context);
499     gen10_hevc_vme_init_me_context(ctx, vme_context, &vme_context->me_context);
500     gen10_hevc_vme_init_mbenc_context(ctx, vme_context, &vme_context->mbenc_context);
501     gen10_hevc_vme_init_brc_context(ctx, vme_context, &vme_context->brc_context);
502 }
503
504 static void
505 gen10_hevc_free_surface(void **data)
506 {
507     struct gen10_hevc_surface_priv *surface_priv;
508
509     if (!data || !*data)
510         return;
511
512     surface_priv = *data;
513
514     if (surface_priv->scaled_4x_surface) {
515         i965_free_gpe_resource(&surface_priv->gpe_scaled_4x_surface);
516
517         i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_4x_surface_id, 1);
518         surface_priv->scaled_4x_surface_id = VA_INVALID_SURFACE;
519         surface_priv->scaled_4x_surface = NULL;
520     }
521
522     if (surface_priv->scaled_16x_surface) {
523         i965_free_gpe_resource(&surface_priv->gpe_scaled_16x_surface);
524
525         i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_16x_surface_id, 1);
526         surface_priv->scaled_16x_surface_id = VA_INVALID_SURFACE;
527         surface_priv->scaled_16x_surface = NULL;
528     }
529
530     if (surface_priv->scaled_2x_surface) {
531         i965_free_gpe_resource(&surface_priv->gpe_scaled_2x_surface);
532
533         i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_2x_surface_id, 1);
534         surface_priv->scaled_2x_surface_id = VA_INVALID_SURFACE;
535         surface_priv->scaled_2x_surface = NULL;
536     }
537
538     if (surface_priv->converted_surface) {
539         i965_free_gpe_resource(&surface_priv->gpe_converted_surface);
540
541         i965_DestroySurfaces(surface_priv->ctx, &surface_priv->converted_surface_id, 1);
542         surface_priv->converted_surface_id = VA_INVALID_SURFACE;
543         surface_priv->converted_surface = NULL;
544     }
545
546     i965_free_gpe_resource(&surface_priv->motion_vector_temporal);
547
548     free(surface_priv);
549
550     *data = NULL;
551
552     return;
553 }
554
555 static VAStatus
556 gen10_hevc_init_surface_priv(VADriverContextP ctx,
557                              struct encode_state *encode_state,
558                              struct intel_encoder_context *encoder_context,
559                              struct object_surface *obj_surface)
560 {
561     struct i965_driver_data *i965 = i965_driver_data(ctx);
562     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
563     struct gen10_hevc_enc_frame_info *frame_info;
564     struct gen10_hevc_enc_state *hevc_state;
565     struct gen10_hevc_surface_priv *surface_priv;
566     int downscaled_width_4x = 0, downscaled_height_4x = 0;
567     int downscaled_width_16x = 0, downscaled_height_16x = 0;
568     int frame_width = 0, frame_height = 0, size;
569
570     if (!obj_surface || !obj_surface->bo)
571         return VA_STATUS_ERROR_INVALID_SURFACE;
572
573     if (obj_surface->private_data &&
574         obj_surface->free_private_data != gen10_hevc_free_surface) {
575         obj_surface->free_private_data(&obj_surface->private_data);
576         obj_surface->private_data = NULL;
577     }
578
579     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
580     frame_info = &vme_context->frame_info;
581
582     if (obj_surface->private_data) {
583         surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
584
585         if ((surface_priv->frame_width == frame_info->frame_width) &&
586             (surface_priv->frame_height == frame_info->frame_height) &&
587             (surface_priv->width_ctb == frame_info->width_in_lcu) &&
588             (surface_priv->height_ctb == frame_info->height_in_lcu) &&
589             (surface_priv->is_10bit == hevc_state->is_10bit) &&
590             (surface_priv->is_64lcu == hevc_state->is_64lcu))
591             return VA_STATUS_SUCCESS;
592
593         obj_surface->free_private_data(&obj_surface->private_data);
594         obj_surface->private_data = NULL;
595         surface_priv = NULL;
596     }
597
598     surface_priv = calloc(1, sizeof(struct gen10_hevc_surface_priv));
599
600     if (!surface_priv)
601         return VA_STATUS_ERROR_ALLOCATION_FAILED;
602
603     surface_priv->ctx = ctx;
604
605     obj_surface->private_data = surface_priv;
606     obj_surface->free_private_data = gen10_hevc_free_surface;
607
608     if (hevc_state->is_64lcu) {
609         frame_width = ALIGN(frame_info->frame_width, 64) >> 1;
610         frame_height = ALIGN(frame_info->frame_height, 64) >> 1;
611
612         if (i965_CreateSurfaces(ctx,
613                                 frame_width,
614                                 frame_height,
615                                 VA_RT_FORMAT_YUV420,
616                                 1,
617                                 &surface_priv->scaled_2x_surface_id) != VA_STATUS_SUCCESS)
618             return VA_STATUS_ERROR_ALLOCATION_FAILED;
619
620         surface_priv->scaled_2x_surface = SURFACE(surface_priv->scaled_2x_surface_id);
621
622         if (!surface_priv->scaled_2x_surface)
623             return VA_STATUS_ERROR_ALLOCATION_FAILED;
624
625         i965_check_alloc_surface_bo(ctx, surface_priv->scaled_2x_surface, 1,
626                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
627
628         i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_2x_surface,
629                                                surface_priv->scaled_2x_surface);
630     }
631
632     if (hevc_state->is_10bit) {
633         if (i965_CreateSurfaces(ctx,
634                                 frame_info->frame_width,
635                                 frame_info->frame_height,
636                                 VA_RT_FORMAT_YUV420,
637                                 1,
638                                 &surface_priv->converted_surface_id) != VA_STATUS_SUCCESS)
639             return VA_STATUS_ERROR_ALLOCATION_FAILED;
640
641         surface_priv->converted_surface = SURFACE(surface_priv->converted_surface_id);
642
643         if (!surface_priv->converted_surface)
644             return VA_STATUS_ERROR_ALLOCATION_FAILED;
645
646         i965_check_alloc_surface_bo(ctx, surface_priv->converted_surface, 1,
647                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
648
649         i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_converted_surface,
650                                                surface_priv->converted_surface);
651     }
652
653     if (hevc_state->hme_supported) {
654         downscaled_width_4x = ALIGN(frame_info->frame_width / 4, 32);
655         downscaled_height_4x = ALIGN(frame_info->frame_height / 4, 32);
656
657         if (i965_CreateSurfaces(ctx,
658                                 downscaled_width_4x,
659                                 downscaled_height_4x,
660                                 VA_RT_FORMAT_YUV420,
661                                 1,
662                                 &surface_priv->scaled_4x_surface_id) != VA_STATUS_SUCCESS)
663             return VA_STATUS_ERROR_ALLOCATION_FAILED;
664
665         surface_priv->scaled_4x_surface = SURFACE(surface_priv->scaled_4x_surface_id);
666
667         if (!surface_priv->scaled_4x_surface)
668             return VA_STATUS_ERROR_ALLOCATION_FAILED;
669
670         i965_check_alloc_surface_bo(ctx, surface_priv->scaled_4x_surface, 1,
671                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
672
673         i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_4x_surface,
674                                                surface_priv->scaled_4x_surface);
675     }
676
677     if (hevc_state->hme_supported &&
678         hevc_state->b16xme_supported) {
679         downscaled_width_16x = ALIGN(downscaled_width_4x / 4, 32);
680         downscaled_height_16x = ALIGN(downscaled_height_4x / 4, 32);
681
682         if (i965_CreateSurfaces(ctx,
683                                 downscaled_width_16x,
684                                 downscaled_height_16x,
685                                 VA_RT_FORMAT_YUV420,
686                                 1,
687                                 &surface_priv->scaled_16x_surface_id) != VA_STATUS_SUCCESS)
688             return VA_STATUS_ERROR_ALLOCATION_FAILED;
689
690         surface_priv->scaled_16x_surface = SURFACE(surface_priv->scaled_16x_surface_id);
691
692         if (!surface_priv->scaled_16x_surface)
693             return VA_STATUS_ERROR_ALLOCATION_FAILED;
694
695         i965_check_alloc_surface_bo(ctx, surface_priv->scaled_16x_surface, 1,
696                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
697
698         i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_16x_surface,
699                                                surface_priv->scaled_16x_surface);
700     }
701
702     frame_width = frame_info->frame_width;
703     frame_height = frame_info->frame_height;
704
705     size = MAX(((frame_width + 63) >> 6) * ((frame_height + 15) >> 4),
706                ((frame_width + 31) >> 5) * ((frame_height + 31) >> 5));
707     size = ALIGN(size, 2) * 64;
708     if (!i965_allocate_gpe_resource(i965->intel.bufmgr,
709                                     &surface_priv->motion_vector_temporal,
710                                     size,
711                                     "Motion vector temporal buffer"))
712         return VA_STATUS_ERROR_ALLOCATION_FAILED;
713
714     surface_priv->is_10bit = hevc_state->is_10bit;
715     surface_priv->is_64lcu = hevc_state->is_64lcu;
716     surface_priv->frame_width = frame_info->frame_width;
717     surface_priv->frame_height = frame_info->frame_height;
718     surface_priv->width_ctb = frame_info->width_in_lcu;
719     surface_priv->height_ctb = frame_info->height_in_lcu;
720
721     return VA_STATUS_SUCCESS;
722 }
723
724 static void
725 gen10_hevc_free_enc_resources(void *context)
726 {
727     struct gen10_hevc_enc_context *vme_context = context;
728
729     if (!vme_context)
730         return;
731
732     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
733
734     i965_free_gpe_resource(&vme_context->res_temp_curecord_lcu32_surface);
735     i965_free_gpe_resource(&vme_context->res_16x16_qp_data_surface);
736     i965_free_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
737     i965_free_gpe_resource(&vme_context->res_concurrent_tg_data);
738     i965_free_gpe_resource(&vme_context->res_cu_split_surface);
739     i965_free_gpe_resource(&vme_context->res_kernel_trace_data);
740     i965_free_gpe_resource(&vme_context->res_enc_const_table_intra);
741     i965_free_gpe_resource(&vme_context->res_enc_const_table_inter);
742     i965_free_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
743     i965_free_gpe_resource(&vme_context->res_scratch_surface);
744
745     i965_free_gpe_resource(&vme_context->res_temp2_curecord_lcu32_surface);
746     i965_free_gpe_resource(&vme_context->res_temp_curecord_surface_lcu64);
747     i965_free_gpe_resource(&vme_context->res_enc_scratch_buffer);
748     i965_free_gpe_resource(&vme_context->res_enc_scratch_lcu64_buffer);
749     i965_free_gpe_resource(&vme_context->res_64x64_dist_buffer);
750
751     i965_free_gpe_resource(&vme_context->res_jbq_header_buffer);
752     i965_free_gpe_resource(&vme_context->res_jbq_header_lcu64_buffer);
753     i965_free_gpe_resource(&vme_context->res_jbq_data_lcu32_surface);
754     i965_free_gpe_resource(&vme_context->res_jbq_data_lcu64_surface);
755     i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu32_surface);
756
757     i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu64_surface);
758     i965_free_gpe_resource(&vme_context->res_mb_stat_surface);
759     i965_free_gpe_resource(&vme_context->res_mb_split_surface);
760
761     i965_free_gpe_resource(&vme_context->res_s4x_memv_data_surface);
762     i965_free_gpe_resource(&vme_context->res_s4x_me_dist_surface);
763
764     i965_free_gpe_resource(&vme_context->res_s16x_memv_data_surface);
765     i965_free_gpe_resource(&vme_context->res_mv_dist_sum_buffer);
766
767     i965_free_gpe_resource(&vme_context->res_brc_me_dist_surface);
768     i965_free_gpe_resource(&vme_context->res_brc_input_enc_kernel_buffer);
769     i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
770     i965_free_gpe_resource(&vme_context->res_brc_intra_dist_surface);
771     i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[0]);
772     i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[1]);
773     i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_write_buffer);
774     i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_read_buffer);
775     i965_free_gpe_resource(&vme_context->res_brc_const_data_surface);
776     i965_free_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
777     i965_free_gpe_resource(&vme_context->res_brc_mb_qp_surface);
778 }
779
780 static VAStatus
781 gen10_hevc_allocate_enc_resources(VADriverContextP ctx,
782                                   struct encode_state *encode_state,
783                                   struct intel_encoder_context *encoder_context)
784
785 {
786     struct i965_driver_data *i965 = i965_driver_data(ctx);
787     struct gen10_hevc_enc_context *vme_context;
788     struct gen10_hevc_enc_state *hevc_state;
789     struct gen10_hevc_enc_frame_info *frame_info;
790     int dw_width, dw_height;
791     int allocate_flag;
792     int res_size;
793     int i;
794
795     vme_context = (struct gen10_hevc_enc_context *)encoder_context->vme_context;
796     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
797     frame_info = &vme_context->frame_info;
798
799     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
800     res_size = vme_context->frame_info.width_in_lcu * vme_context->frame_info.height_in_lcu;
801     if (hevc_state->is_64lcu)
802         res_size = res_size * 64 * 32;
803     else
804         res_size = res_size * 16 * 32;
805
806     res_size = res_size + hevc_state->cu_records_offset;
807     res_size = ALIGN(res_size, 4096);
808     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
809                                                &vme_context->res_mb_code_surface,
810                                                res_size,
811                                                "Mb Code_Surface");
812     if (!allocate_flag)
813         goto FAIL;
814
815     i965_free_gpe_resource(&vme_context->res_temp_curecord_lcu32_surface);
816     dw_width = ALIGN(hevc_state->frame_width, 64);
817     dw_height = ALIGN(hevc_state->frame_height, 64);
818     dw_width = ALIGN(dw_width, 64);
819     res_size = dw_width * dw_height * 64 + 1024;
820     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
821                                                   &vme_context->res_temp_curecord_lcu32_surface,
822                                                   dw_width, dw_height, dw_width,
823                                                   "Temp CURecord surfaces");
824     if (!allocate_flag)
825         goto FAIL;
826
827     i965_free_gpe_resource(&vme_context->res_16x16_qp_data_surface);
828     dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
829     dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
830     dw_width = ALIGN(dw_width, 64);
831     dw_height = ALIGN(dw_height, 64);
832     dw_width = ALIGN(dw_width, 64);
833     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
834                                                   &vme_context->res_16x16_qp_data_surface,
835                                                   dw_width, dw_height, dw_width,
836                                                   "CU 16x16 input surface");
837     if (!allocate_flag)
838         goto FAIL;
839
840     i965_free_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
841     res_size = vme_context->frame_info.width_in_lcu * vme_context->frame_info.height_in_lcu * 16;
842     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
843                                                &vme_context->res_lculevel_input_data_buffer,
844                                                res_size,
845                                                "LCU Input data buffer");
846     if (!allocate_flag)
847         goto FAIL;
848
849     i965_free_gpe_resource(&vme_context->res_concurrent_tg_data);
850     res_size = 16 * 256;
851     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
852                                                &vme_context->res_concurrent_tg_data,
853                                                res_size,
854                                                "Concurrent Thread_group data");
855     if (!allocate_flag)
856         goto FAIL;
857
858     i965_free_gpe_resource(&vme_context->res_cu_split_surface);
859     dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
860     dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
861     dw_width = ALIGN(dw_width, 64);
862     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
863                                                   &vme_context->res_cu_split_surface,
864                                                   dw_width, dw_height, dw_width,
865                                                   "CU split surface");
866     if (!allocate_flag)
867         goto FAIL;
868
869     i965_free_gpe_resource(&vme_context->res_kernel_trace_data);
870     res_size = 4096;
871     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
872                                                &vme_context->res_kernel_trace_data,
873                                                res_size,
874                                                "Kernel trace");
875     if (!allocate_flag)
876         goto FAIL;
877
878     i965_free_gpe_resource(&vme_context->res_enc_const_table_intra);
879     res_size = GEN10_HEVC_ENC_INTRA_CONST_LUT_SIZE ;
880     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
881                                                &vme_context->res_enc_const_table_intra,
882                                                res_size,
883                                                "Constant data for Intra");
884     if (!allocate_flag)
885         goto FAIL;
886
887     i965_free_gpe_resource(&vme_context->res_enc_const_table_inter);
888     res_size = GEN10_HEVC_ENC_INTER_CONST_LUT32_SIZE ;
889
890     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891                                                &vme_context->res_enc_const_table_inter,
892                                                res_size,
893                                                "Constant data for Inter");
894     if (!allocate_flag)
895         goto FAIL;
896
897     i965_free_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
898     if (hevc_state->is_64lcu) {
899         res_size = GEN10_HEVC_ENC_INTER_CONST_LUT64_SIZE ;
900
901         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
902                                                    &vme_context->res_enc_const_table_inter_lcu64,
903                                                    res_size,
904                                                    "Constant data for LCU64_Inter");
905         if (!allocate_flag)
906             goto FAIL;
907     }
908
909     i965_free_gpe_resource(&vme_context->res_scratch_surface);
910     dw_width = ALIGN(hevc_state->frame_width, 64) >> 3;
911     dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
912     dw_width = ALIGN(dw_width, 64);
913     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
914                                                   &vme_context->res_scratch_surface,
915                                                   dw_width, dw_height, dw_width,
916                                                   "CU scratch surface");
917     if (!allocate_flag)
918         goto FAIL;
919
920     i965_free_gpe_resource(&vme_context->res_temp2_curecord_lcu32_surface);
921     dw_width = ALIGN(hevc_state->frame_width, 64);
922     dw_height = ALIGN(hevc_state->frame_height, 64);
923     dw_width = ALIGN(dw_width, 64);
924     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
925                                                   &vme_context->res_temp2_curecord_lcu32_surface,
926                                                   dw_width, dw_height, dw_width,
927                                                   "second temp CURecord surfaces");
928     if (!allocate_flag)
929         goto FAIL;
930
931     if (hevc_state->is_64lcu) {
932         i965_free_gpe_resource(&vme_context->res_temp_curecord_surface_lcu64);
933         /* the max number of CU based on 8x8. */
934         dw_width = ALIGN(hevc_state->frame_width, 64);
935         dw_height = ALIGN(hevc_state->frame_height, 64) / 2;
936         dw_width = ALIGN(dw_width, 64);
937         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
938                                                       &vme_context->res_temp_curecord_surface_lcu64,
939                                                       dw_width, dw_height, dw_width,
940                                                       "temp CURecord LCU64 surfaces");
941         if (!allocate_flag)
942             goto FAIL;
943     }
944
945     i965_free_gpe_resource(&vme_context->res_enc_scratch_buffer);
946     dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
947     dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
948     res_size = dw_width * dw_height * 13312 + 4096;
949     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
950                                                &vme_context->res_enc_scratch_buffer,
951                                                res_size,
952                                                "Enc Scratch data");
953     if (!allocate_flag)
954         goto FAIL;
955
956     i965_free_gpe_resource(&vme_context->res_enc_scratch_lcu64_buffer);
957     dw_width = vme_context->frame_info.width_in_lcu;
958     dw_height = vme_context->frame_info.height_in_lcu;
959     res_size = dw_width * dw_height * 13312;
960     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
961                                                &vme_context->res_enc_scratch_lcu64_buffer,
962                                                res_size,
963                                                "Enc Scratch data");
964     if (!allocate_flag)
965         goto FAIL;
966
967     i965_free_gpe_resource(&vme_context->res_64x64_dist_buffer);
968     dw_width = ALIGN(hevc_state->frame_width, 64) >> 6;
969     dw_height = ALIGN(hevc_state->frame_height, 64) >> 6;
970     res_size = dw_width * dw_height * 32;
971     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
972                                                &vme_context->res_64x64_dist_buffer,
973                                                res_size,
974                                                "Res 64x64 Distortion");
975     if (!allocate_flag)
976         goto FAIL;
977
978     i965_free_gpe_resource(&vme_context->res_jbq_header_buffer);
979     dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
980     dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
981     res_size = dw_width * dw_height * 2656;
982     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
983                                                &vme_context->res_jbq_header_buffer,
984                                                res_size,
985                                                "Job queue_header");
986     if (!allocate_flag)
987         goto FAIL;
988
989     i965_free_gpe_resource(&vme_context->res_jbq_header_lcu64_buffer);
990     dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
991     dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
992     res_size = dw_width * dw_height * 32;
993     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
994                                                &vme_context->res_jbq_header_lcu64_buffer,
995                                                res_size,
996                                                "Job queue_header for Multi-thread LCU");
997     if (!allocate_flag)
998         goto FAIL;
999
1000     i965_free_gpe_resource(&vme_context->res_jbq_data_lcu32_surface);
1001     dw_width = ALIGN(hevc_state->frame_width, 64);
1002     dw_height = (ALIGN(hevc_state->frame_height, 64) >> 5) * 58;
1003     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1004                                                   &vme_context->res_jbq_data_lcu32_surface,
1005                                                   dw_width, dw_height, dw_width,
1006                                                   "Job queue data surface for Multi-thread LCU32");
1007     if (!allocate_flag)
1008         goto FAIL;
1009
1010     i965_free_gpe_resource(&vme_context->res_jbq_data_lcu64_surface);
1011     dw_width = ALIGN(hevc_state->frame_width, 64) >> 1;
1012     dw_height = (ALIGN(hevc_state->frame_height, 64) >> 6) * 66;
1013     dw_width = ALIGN(dw_width, 64);
1014     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1015                                                   &vme_context->res_jbq_data_lcu64_surface,
1016                                                   dw_width, dw_height, dw_width,
1017                                                   "Job queue data surface for Multi-thread LCU64");
1018     if (!allocate_flag)
1019         goto FAIL;
1020
1021     i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu32_surface);
1022     dw_width = ALIGN(hevc_state->frame_width, 64) << 1;
1023     dw_height = ALIGN(hevc_state->frame_height, 64) << 2;
1024     dw_width = ALIGN(dw_width, 64);
1025     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1026                                                   &vme_context->res_residual_scratch_lcu32_surface,
1027                                                   dw_width, dw_height, dw_width,
1028                                                   "Resiudal scratch for LCU32");
1029     if (!allocate_flag)
1030         goto FAIL;
1031
1032     i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu64_surface);
1033     dw_width = ALIGN(hevc_state->frame_width, 64) << 1;
1034     dw_height = ALIGN(hevc_state->frame_height, 64) << 2;
1035     dw_width = ALIGN(dw_width, 64);
1036     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1037                                                   &vme_context->res_residual_scratch_lcu64_surface,
1038                                                   dw_width, dw_height, dw_width,
1039                                                   "Resiudal scratch for LCU64");
1040     if (!allocate_flag)
1041         goto FAIL;
1042
1043     i965_free_gpe_resource(&vme_context->res_mb_stat_surface);
1044     dw_width = ALIGN(frame_info->width_in_mb * 4, 64);
1045     dw_height = ALIGN(frame_info->height_in_mb, 8) * 2;
1046     dw_width = ALIGN(dw_width, 64);
1047     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1048                                                   &vme_context->res_mb_stat_surface,
1049                                                   dw_width, dw_height, dw_width,
1050                                                   "MB 16x16 stat");
1051     if (!allocate_flag)
1052         goto FAIL;
1053
1054     i965_free_gpe_resource(&vme_context->res_mb_split_surface);
1055     dw_width = ALIGN(hevc_state->frame_width, 64) >> 2;
1056     dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
1057     dw_width = ALIGN(dw_width, 64);
1058     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1059                                                   &vme_context->res_mb_split_surface,
1060                                                   dw_width, dw_height, dw_width,
1061                                                   "MB split surface");
1062     if (!allocate_flag)
1063         goto FAIL;
1064
1065     if (hevc_state->hme_supported) {
1066         i965_free_gpe_resource(&vme_context->res_s4x_memv_data_surface);
1067         dw_width = hevc_state->frame_width_4x * 4;
1068         dw_height = hevc_state->frame_height_4x >> 3;
1069         dw_width = ALIGN(dw_width, 64);
1070         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1071                                                       &vme_context->res_s4x_memv_data_surface,
1072                                                       dw_width, dw_height, dw_width,
1073                                                       "HME MEMV Data");
1074         if (!allocate_flag)
1075             goto FAIL;
1076
1077         i965_free_gpe_resource(&vme_context->res_s4x_me_dist_surface);
1078         dw_width = hevc_state->frame_width_4x;
1079         dw_height = hevc_state->frame_height_4x >> 1;
1080         dw_width = ALIGN(dw_width, 64);
1081         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082                                                       &vme_context->res_s4x_me_dist_surface,
1083                                                       dw_width, dw_height, dw_width,
1084                                                       "HME Distorion");
1085         if (!allocate_flag)
1086             goto FAIL;
1087     }
1088
1089     if (hevc_state->hme_supported &&
1090         hevc_state->b16xme_supported) {
1091         i965_free_gpe_resource(&vme_context->res_s16x_memv_data_surface);
1092         dw_width = hevc_state->frame_width_16x * 4;
1093         dw_height = hevc_state->frame_height_16x >> 3;
1094         dw_width = ALIGN(dw_width, 64);
1095         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1096                                                       &vme_context->res_s16x_memv_data_surface,
1097                                                       dw_width, dw_height, dw_width,
1098                                                       "16xME MEMV Data");
1099         if (!allocate_flag)
1100             goto FAIL;
1101     }
1102
1103     i965_free_gpe_resource(&vme_context->res_mv_dist_sum_buffer);
1104     res_size = 64;
1105     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1106                                                &vme_context->res_mv_dist_sum_buffer,
1107                                                res_size,
1108                                                "MV_DIST_sum");
1109     if (!allocate_flag)
1110         goto FAIL;
1111
1112     i965_free_gpe_resource(&vme_context->res_brc_me_dist_surface);
1113     dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
1114     dw_width = ALIGN(dw_width, 64);
1115     dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
1116     dw_height = ALIGN(dw_height, 64);
1117     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1118                                                   &vme_context->res_brc_me_dist_surface,
1119                                                   dw_width, dw_height, dw_width,
1120                                                   "ME BRC distortion");
1121     if (!allocate_flag)
1122         goto FAIL;
1123
1124     i965_free_gpe_resource(&vme_context->res_brc_input_enc_kernel_buffer);
1125     res_size = 1024;
1126     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1127                                                &vme_context->res_brc_input_enc_kernel_buffer,
1128                                                res_size,
1129                                                "Brc Input for Enc Kernel");
1130     if (!allocate_flag)
1131         goto FAIL;
1132
1133     i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
1134     res_size = GEN10_HEVC_BRC_HISTORY_BUFFER_SIZE;
1135     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1136                                                &vme_context->res_brc_history_buffer,
1137                                                res_size,
1138                                                "Brc History buffer");
1139     if (!allocate_flag)
1140         goto FAIL;
1141
1142     i965_zero_gpe_resource(&vme_context->res_brc_history_buffer);
1143
1144     i965_free_gpe_resource(&vme_context->res_brc_intra_dist_surface);
1145     dw_width = ALIGN(hevc_state->frame_width_4x / 2, 64);
1146     dw_height = ALIGN(hevc_state->frame_height_4x / 4, 8) * 2;
1147     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148                                                   &vme_context->res_brc_intra_dist_surface,
1149                                                   dw_width, dw_height, dw_width,
1150                                                   "Brc Intra distortion buffer");
1151     if (!allocate_flag)
1152         goto FAIL;
1153
1154     i965_zero_gpe_resource(&vme_context->res_brc_intra_dist_surface);
1155
1156     for (i = 0; i < 2; i++) {
1157         i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[i]);
1158         res_size = 64;
1159         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1160                                                    &vme_context->res_brc_pak_statistics_buffer[i],
1161                                                    res_size,
1162                                                    "Brc Pak statistics buffer");
1163         if (!allocate_flag)
1164             goto FAIL;
1165     }
1166
1167     i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_write_buffer);
1168     res_size = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS * 8;
1169     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1170                                                &vme_context->res_brc_pic_image_state_write_buffer,
1171                                                res_size,
1172                                                "Brc Pic State Write buffer");
1173     if (!allocate_flag)
1174         goto FAIL;
1175
1176     i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_read_buffer);
1177     res_size = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS * 8;
1178     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1179                                                &vme_context->res_brc_pic_image_state_read_buffer,
1180                                                res_size,
1181                                                "Brc Pic State Read buffer");
1182     if (!allocate_flag)
1183         goto FAIL;
1184
1185     i965_free_gpe_resource(&vme_context->res_brc_const_data_surface);
1186     dw_width = ALIGN(GEN10_HEVC_BRC_CONST_SURFACE_WIDTH, 64);
1187     dw_height = ALIGN(GEN10_HEVC_BRC_CONST_SURFACE_HEIGHT, 32);
1188     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1189                                                   &vme_context->res_brc_const_data_surface,
1190                                                   dw_width, dw_height, dw_width,
1191                                                   "Brc Const data buffer");
1192     if (!allocate_flag)
1193         goto FAIL;
1194
1195     i965_free_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
1196     res_size = 4096;
1197     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1198                                                &vme_context->res_brc_lcu_const_data_buffer,
1199                                                res_size,
1200                                                "BRC LCU Const_data buffer");
1201     if (!allocate_flag)
1202         goto FAIL;
1203
1204     i965_zero_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
1205
1206     i965_free_gpe_resource(&vme_context->res_brc_mb_qp_surface);
1207     dw_width = ALIGN(hevc_state->frame_width_4x * 4, 64) >> 4;
1208     dw_height = ALIGN(hevc_state->frame_height_4x * 4, 64) >> 5;
1209
1210     dw_width = ALIGN(dw_width, 64);
1211     dw_height = ALIGN(dw_height, 8);
1212     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1213                                                   &vme_context->res_brc_mb_qp_surface,
1214                                                   dw_width, dw_height, dw_width,
1215                                                   "Brc LCU qp data buffer");
1216     if (!allocate_flag)
1217         goto FAIL;
1218
1219     i965_zero_gpe_resource(&vme_context->res_brc_mb_qp_surface);
1220
1221     return VA_STATUS_SUCCESS;
1222
1223 FAIL:
1224     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1225 }
1226
1227 static VAStatus
1228 gen10_hevc_enc_init_const_resources(VADriverContextP ctx,
1229                                     struct encode_state *encode_state,
1230                                     struct intel_encoder_context *encoder_context)
1231 {
1232     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1233     struct gen10_hevc_enc_state *hevc_state;
1234     char *buffer_ptr;
1235
1236     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1237
1238     buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_intra);
1239     if (!buffer_ptr)
1240         return VA_STATUS_ERROR_OPERATION_FAILED;
1241
1242     memcpy(buffer_ptr, gen10_hevc_enc_intra_const_lut,
1243            GEN10_HEVC_ENC_INTRA_CONST_LUT_SIZE);
1244
1245     i965_unmap_gpe_resource(&vme_context->res_enc_const_table_intra);
1246
1247     buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_inter);
1248     if (!buffer_ptr)
1249         return VA_STATUS_ERROR_OPERATION_FAILED;
1250
1251     memcpy(buffer_ptr, gen10_hevc_enc_inter_const_lut32,
1252            GEN10_HEVC_ENC_INTER_CONST_LUT32_SIZE);
1253
1254     i965_unmap_gpe_resource(&vme_context->res_enc_const_table_inter);
1255
1256     if (hevc_state->is_64lcu) {
1257         buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
1258         if (!buffer_ptr)
1259             return VA_STATUS_ERROR_OPERATION_FAILED;
1260
1261         memcpy(buffer_ptr, gen10_hevc_enc_inter_const_lut64,
1262                GEN10_HEVC_ENC_INTER_CONST_LUT64_SIZE);
1263
1264         i965_unmap_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
1265     }
1266
1267     buffer_ptr = i965_map_gpe_resource(&vme_context->res_brc_const_data_surface);
1268     if (!buffer_ptr)
1269         return VA_STATUS_ERROR_OPERATION_FAILED;
1270
1271     memcpy(buffer_ptr, gen10_hevc_brc_qp_adjust_data, GEN10_HEVC_BRC_QP_ADJUST_SIZE);
1272
1273     buffer_ptr += GEN10_HEVC_BRC_QP_ADJUST_SIZE;
1274
1275     if (hevc_state->is_64lcu)
1276         memcpy(buffer_ptr, gen10_hevc_brc_lcu64_lambda_cost, GEN10_HEVC_BRC_LCU_LAMBDA_COST);
1277     else
1278         memcpy(buffer_ptr, gen10_hevc_brc_lcu32_lambda_cost, GEN10_HEVC_BRC_LCU_LAMBDA_COST);
1279
1280     i965_unmap_gpe_resource(&vme_context->res_brc_const_data_surface);
1281
1282     return VA_STATUS_SUCCESS;
1283 }
1284
1285 static VAStatus
1286 gen10_hevc_enc_check_parameters(VADriverContextP ctx,
1287                                 struct encode_state *encode_state,
1288                                 struct intel_encoder_context *encoder_context)
1289 {
1290     VAEncSequenceParameterBufferHEVC *seq_param;
1291     VAEncPictureParameterBufferHEVC *pic_param;
1292     VAEncSliceParameterBufferHEVC *slice_param;
1293     int i = 0, j = 0;
1294
1295     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1296     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1297     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1298
1299     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1300         slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
1301
1302         if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
1303             slice_param->slice_fields.bits.collocated_from_l0_flag &&
1304             (pic_param->collocated_ref_pic_index == 0xff ||
1305              pic_param->collocated_ref_pic_index > GEN10_MAX_REF_SURFACES))
1306             slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag = 0;
1307
1308         if (slice_param->num_ref_idx_l0_active_minus1 > GEN10_HEVC_NUM_MAX_REF_L0 - 1 ||
1309             slice_param->num_ref_idx_l1_active_minus1 > GEN10_HEVC_NUM_MAX_REF_L1 - 1)
1310             return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1311
1312         if (slice_param->slice_type == HEVC_SLICE_P)
1313             return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1314     }
1315
1316     i = seq_param->log2_diff_max_min_luma_coding_block_size +
1317         seq_param->log2_min_luma_coding_block_size_minus3 + 3;
1318     if (i < GEN10_HEVC_LOG2_MIN_HEVC_LCU ||
1319         i > GEN10_HEVC_LOG2_MAX_HEVC_LCU)
1320         return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1321
1322     i = seq_param->log2_min_transform_block_size_minus2 +
1323         seq_param->log2_diff_max_min_transform_block_size + 2;
1324     j = seq_param->log2_min_luma_coding_block_size_minus3 +
1325         seq_param->log2_diff_max_min_luma_coding_block_size + 3;
1326
1327     if (i != j)
1328         return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1329
1330     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1331     i = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1332     j = -seq_param->seq_fields.bits.bit_depth_luma_minus8 * 6;
1333     if (i < j || i > 51)
1334         return VA_STATUS_ERROR_INVALID_PARAMETER;
1335
1336     if (seq_param->seq_fields.bits.chroma_format_idc != 1)
1337         return VA_STATUS_ERROR_INVALID_PARAMETER;
1338
1339     return VA_STATUS_SUCCESS;
1340 }
1341
1342 static VAStatus
1343 gen10_hevc_enc_init_misc_paramers(VADriverContextP ctx,
1344                                   struct encode_state *encode_state,
1345                                   struct intel_encoder_context *encoder_context)
1346 {
1347     struct gen10_hevc_enc_context *vme_context = NULL;
1348     struct gen10_hevc_enc_state *hevc_state;
1349     struct gen10_hevc_enc_frame_info *frame_info;
1350     VAEncSequenceParameterBufferHEVC *seq_param;
1351     VAEncSliceParameterBufferHEVC *slice_param;
1352     uint32_t brc_method, brc_reset;
1353
1354     vme_context = (struct gen10_hevc_enc_context *) encoder_context->vme_context;
1355     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1356     frame_info = &vme_context->frame_info;
1357     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1358     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1359
1360     hevc_state->low_delay = frame_info->low_delay;
1361
1362     hevc_state->frame_width = frame_info->frame_width;
1363     hevc_state->frame_height = frame_info->frame_height;
1364
1365     hevc_state->frame_width_2x = ALIGN(frame_info->frame_width / 2, 32);
1366     hevc_state->frame_height_2x = ALIGN(frame_info->frame_height / 2, 32);
1367
1368     hevc_state->frame_width_4x = ALIGN(frame_info->frame_width / 4, 32);
1369     hevc_state->frame_height_4x = ALIGN(frame_info->frame_height / 4, 32);
1370
1371     hevc_state->frame_width_16x = ALIGN(hevc_state->frame_width_4x / 4, 32);
1372     hevc_state->frame_height_16x = ALIGN(hevc_state->frame_height_4x / 4, 32);
1373
1374     hevc_state->cu_records_offset = ALIGN(frame_info->width_in_lcu *
1375                                           frame_info->height_in_lcu *
1376                                           32, 4096);
1377
1378     hevc_state->hme_supported = 1;
1379     hevc_state->b16xme_supported = 1;
1380
1381     if (hevc_state->frame_width_4x <= GEN10_HEVC_VME_REF_WIN ||
1382         hevc_state->frame_height_4x <= GEN10_HEVC_VME_REF_WIN) {
1383         hevc_state->b16xme_supported = 0;
1384
1385         hevc_state->frame_width_4x = GEN10_HEVC_VME_REF_WIN;
1386         hevc_state->frame_height_4x = GEN10_HEVC_VME_REF_WIN;
1387     } else if (hevc_state->frame_width_16x <= GEN10_HEVC_VME_REF_WIN ||
1388                hevc_state->frame_height_16x <= GEN10_HEVC_VME_REF_WIN) {
1389         hevc_state->frame_width_16x = GEN10_HEVC_VME_REF_WIN;
1390         hevc_state->frame_height_16x = GEN10_HEVC_VME_REF_WIN;
1391     }
1392
1393     if (slice_param->slice_type == HEVC_SLICE_I) {
1394         hevc_state->hme_enabled = 0;
1395         hevc_state->b16xme_enabled = 0;
1396     } else {
1397         hevc_state->hme_enabled = hevc_state->hme_supported;
1398         hevc_state->b16xme_enabled = hevc_state->b16xme_supported;
1399     }
1400
1401     if (frame_info->lcu_size == 64)
1402         hevc_state->is_64lcu = 1;
1403     else
1404         hevc_state->is_64lcu = 0;
1405
1406     if (frame_info->bit_depth_luma_minus8 ||
1407         frame_info->bit_depth_chroma_minus8)
1408         hevc_state->is_10bit = 1;
1409     else
1410         hevc_state->is_10bit = 0;
1411
1412     brc_method = GEN10_HEVC_BRC_CQP;
1413     if (encoder_context->rate_control_mode & VA_RC_CBR)
1414         brc_method = GEN10_HEVC_BRC_CBR;
1415     else if (encoder_context->rate_control_mode & VA_RC_VBR)
1416         brc_method = GEN10_HEVC_BRC_VBR;
1417
1418     brc_reset = hevc_state->brc.brc_method != brc_method ||
1419                 frame_info->reallocate_flag;
1420
1421     if (!hevc_state->brc.brc_inited ||
1422         encoder_context->brc.need_reset ||
1423         brc_reset) {
1424         if (brc_method == GEN10_HEVC_BRC_CQP) {
1425             hevc_state->brc.brc_enabled = 0;
1426             hevc_state->num_pak_passes = 1;
1427         } else {
1428             hevc_state->brc.brc_enabled = 1;
1429             hevc_state->num_pak_passes = 1;//2;
1430
1431             if (brc_method == GEN10_HEVC_BRC_CBR) {
1432                 hevc_state->brc.target_bit_rate = encoder_context->brc.bits_per_second[0];
1433                 hevc_state->brc.max_bit_rate = encoder_context->brc.bits_per_second[0];
1434                 hevc_state->brc.min_bit_rate = encoder_context->brc.bits_per_second[0];
1435                 hevc_state->brc.window_size = encoder_context->brc.window_size;
1436             } else {
1437                 hevc_state->brc.max_bit_rate = encoder_context->brc.bits_per_second[0];
1438                 hevc_state->brc.target_bit_rate = encoder_context->brc.bits_per_second[0] *
1439                                                   encoder_context->brc.target_percentage[0] /
1440                                                   100;
1441
1442                 if (2 * hevc_state->brc.target_bit_rate < hevc_state->brc.max_bit_rate)
1443                     hevc_state->brc.min_bit_rate = 0;
1444                 else
1445                     hevc_state->brc.min_bit_rate = 2 * hevc_state->brc.target_bit_rate -
1446                                                    hevc_state->brc.max_bit_rate;
1447             }
1448         }
1449
1450         if (encoder_context->brc.hrd_buffer_size)
1451             hevc_state->brc.vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
1452         else if (encoder_context->brc.window_size)
1453             hevc_state->brc.vbv_buffer_size_in_bit = hevc_state->brc.max_bit_rate *
1454                                                      encoder_context->brc.window_size /
1455                                                      1000;
1456         else
1457             hevc_state->brc.vbv_buffer_size_in_bit = hevc_state->brc.max_bit_rate;
1458
1459         if (encoder_context->brc.hrd_initial_buffer_fullness)
1460             hevc_state->brc.init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
1461         else
1462             hevc_state->brc.init_vbv_buffer_fullness_in_bit = hevc_state->brc.vbv_buffer_size_in_bit / 2;
1463
1464         hevc_state->brc.gop_size = encoder_context->brc.gop_size;
1465         hevc_state->brc.gop_p = encoder_context->brc.num_pframes_in_gop;
1466         hevc_state->brc.gop_b = encoder_context->brc.num_bframes_in_gop;
1467
1468         hevc_state->brc.frame_rate_m = encoder_context->brc.framerate[0].num;
1469         hevc_state->brc.frame_rate_d = encoder_context->brc.framerate[0].den;
1470
1471         hevc_state->brc.brc_method = brc_method;
1472         hevc_state->brc.brc_reset = brc_reset || encoder_context->brc.need_reset;
1473
1474         if (brc_method == GEN10_HEVC_BRC_CQP && !hevc_state->brc.brc_inited) {
1475             hevc_state->brc.frame_rate_m = 30;
1476             hevc_state->brc.frame_rate_d = 1;
1477
1478             hevc_state->brc.target_bit_rate = (hevc_state->frame_width >> 4) * (hevc_state->frame_height >> 4)
1479                                               * 30 * 384 / 10 * 8;
1480             hevc_state->brc.max_bit_rate = hevc_state->brc.target_bit_rate;
1481             hevc_state->brc.min_bit_rate = hevc_state->brc.target_bit_rate;
1482             hevc_state->brc.window_size = 1500;
1483             hevc_state->brc.vbv_buffer_size_in_bit = (hevc_state->brc.target_bit_rate / 1000) * 1500;
1484             hevc_state->brc.init_vbv_buffer_fullness_in_bit = hevc_state->brc.vbv_buffer_size_in_bit / 2;
1485
1486             hevc_state->brc.gop_size = seq_param->intra_period < 2 ? 30 : seq_param->intra_period;
1487             hevc_state->brc.gop_p = (hevc_state->brc.gop_size - 1) /
1488                                     (!seq_param->ip_period ? 1 : seq_param->ip_period);
1489             hevc_state->brc.gop_b =  hevc_state->brc.gop_size - 1 - hevc_state->brc.gop_p;
1490         }
1491
1492         hevc_state->profile_level_max_frame =
1493             gen10_hevc_enc_get_profile_level_max_frame(seq_param, 0,
1494                                                        hevc_state->brc.frame_rate_m /
1495                                                        hevc_state->brc.frame_rate_d);
1496     }
1497
1498     hevc_state->sao_2nd_needed = 0;
1499     hevc_state->sao_first_pass_flag = 0;
1500     hevc_state->num_sao_passes = hevc_state->num_pak_passes;
1501     if (seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag &&
1502         (slice_param->slice_fields.bits.slice_sao_luma_flag ||
1503          slice_param->slice_fields.bits.slice_sao_chroma_flag)) {
1504         hevc_state->sao_2nd_needed = 1;
1505         hevc_state->sao_first_pass_flag = 1;
1506         hevc_state->num_sao_passes = hevc_state->num_pak_passes + 1;
1507     }
1508
1509     hevc_state->brc.target_usage = encoder_context->quality_level;
1510     hevc_state->thread_num_per_ctb = gen10_hevc_tu_settings[GEN10_TOTAL_THREAD_NUM_PER_LCU_TU_PARAM]
1511                                      [(hevc_state->brc.target_usage + 1) >> 2];
1512
1513     hevc_state->is_same_ref_list = frame_info->is_same_ref_list;
1514
1515     return VA_STATUS_SUCCESS;
1516 }
1517
1518 static VAStatus
1519 gen10_hevc_enc_init_parameters(VADriverContextP ctx,
1520                                struct encode_state *encode_state,
1521                                struct intel_encoder_context *encoder_context)
1522 {
1523     struct gen10_hevc_enc_context *vme_context;
1524     struct gen10_hevc_enc_state *hevc_state;
1525     struct gen10_hevc_enc_frame_info *frame_info;
1526     struct gen10_hevc_enc_common_res *common_res;
1527     VAStatus va_status = VA_STATUS_SUCCESS;
1528
1529     va_status = gen10_hevc_enc_check_parameters(ctx, encode_state, encoder_context);
1530     if (va_status != VA_STATUS_SUCCESS)
1531         goto EXIT;
1532
1533     vme_context = (struct gen10_hevc_enc_context *) encoder_context->vme_context;
1534     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1535     frame_info = &vme_context->frame_info;
1536     common_res = &vme_context->common_res;
1537
1538     gen10_hevc_enc_init_frame_info(ctx, encode_state, encoder_context, frame_info);
1539     gen10_hevc_enc_init_status_buffer(ctx, encode_state, encoder_context,
1540                                       &vme_context->status_buffer);
1541
1542     if (!hevc_state->lambda_init ||
1543         frame_info->reallocate_flag) {
1544         gen10_hevc_enc_init_lambda_param(&vme_context->lambda_param, frame_info->bit_depth_luma_minus8,
1545                                          frame_info->bit_depth_chroma_minus8);
1546
1547         hevc_state->lambda_init = 1;
1548     }
1549
1550     if (gen10_hevc_enc_init_common_resource(ctx, encode_state, encoder_context,
1551                                             common_res,
1552                                             frame_info,
1553                                             frame_info->picture_coding_type != HEVC_SLICE_I,
1554                                             0) < 0) {
1555         va_status = VA_STATUS_ERROR_ALLOCATION_FAILED;
1556         goto EXIT;
1557     }
1558
1559     va_status = gen10_hevc_enc_init_misc_paramers(ctx, encode_state, encoder_context);
1560     if (va_status != VA_STATUS_SUCCESS)
1561         goto EXIT;
1562
1563     va_status = gen10_hevc_enc_ensure_surface(ctx,
1564                                               common_res->uncompressed_pic.obj_surface,
1565                                               frame_info->bit_depth_luma_minus8,
1566                                               0);
1567     if (va_status != VA_STATUS_SUCCESS)
1568         goto EXIT;
1569
1570     va_status = gen10_hevc_enc_ensure_surface(ctx,
1571                                               common_res->reconstructed_pic.obj_surface,
1572                                               frame_info->bit_depth_luma_minus8,
1573                                               1);
1574     if (va_status != VA_STATUS_SUCCESS)
1575         goto EXIT;
1576
1577     va_status = gen10_hevc_init_surface_priv(ctx, encode_state, encoder_context,
1578                                              common_res->reconstructed_pic.obj_surface);
1579     if (va_status != VA_STATUS_SUCCESS)
1580         goto EXIT;
1581
1582     if (frame_info->reallocate_flag) {
1583         va_status = gen10_hevc_allocate_enc_resources(ctx, encode_state,
1584                                                       encoder_context);
1585         if (va_status != VA_STATUS_SUCCESS)
1586             goto EXIT;
1587
1588         hevc_state->frame_number = 0;
1589     }
1590
1591     va_status = gen10_hevc_enc_init_const_resources(ctx, encode_state, encoder_context);
1592     if (va_status != VA_STATUS_SUCCESS)
1593         goto EXIT;
1594
1595 EXIT:
1596     return va_status;
1597 }
1598
1599 #define GEN10_WALKER_26_DEGREE       0
1600 #define GEN10_WALKER_26Z_DEGREE      1
1601 #define GEN10_WALKER_26X_DEGREE      2
1602 #define GEN10_WALKER_26ZX_DEGREE     3
1603
1604 static void
1605 gen10_init_media_object_walker_parameter(struct gen10_hevc_enc_kernel_walker_parameter *kernel_walker_param,
1606                                          struct gpe_media_object_walker_parameter *walker_param)
1607 {
1608     memset(walker_param, 0, sizeof(*walker_param));
1609
1610     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
1611
1612     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
1613     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
1614
1615     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
1616     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
1617
1618     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
1619     walker_param->global_outer_loop_stride.y = 0;
1620
1621     walker_param->global_inner_loop_unit.x = 0;
1622     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
1623
1624     walker_param->local_loop_exec_count = 0xFFFF;
1625     walker_param->global_loop_exec_count = 0xFFFF;
1626
1627     if (kernel_walker_param->no_dependency) {
1628         walker_param->scoreboard_mask = 0;
1629         walker_param->use_scoreboard = 0;
1630         walker_param->local_outer_loop_stride.x = 0;
1631         walker_param->local_outer_loop_stride.y = 1;
1632         walker_param->local_inner_loop_unit.x = 1;
1633         walker_param->local_inner_loop_unit.y = 0;
1634         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
1635         walker_param->local_end.y = 0;
1636     } else if (kernel_walker_param->use_vertical_scan) {
1637         walker_param->scoreboard_mask            = 0x1;
1638         walker_param->local_outer_loop_stride.x   = 1;
1639         walker_param->local_outer_loop_stride.y   = 0;
1640         walker_param->local_inner_loop_unit.x   = 0;
1641         walker_param->local_inner_loop_unit.y   = 1;
1642         walker_param->local_end.x             = 0;
1643         walker_param->local_end.y             = kernel_walker_param->resolution_y - 1;
1644     } else {
1645         walker_param->local_end.x = 0;
1646         walker_param->local_end.y = 0;
1647     }
1648 }
1649
1650 static void
1651 gen10_run_kernel_media_object(VADriverContextP ctx,
1652                               struct intel_encoder_context *encoder_context,
1653                               struct i965_gpe_context *gpe_context,
1654                               int media_function,
1655                               struct gpe_media_object_parameter *param)
1656 {
1657     struct intel_batchbuffer *batch = encoder_context->base.batch;
1658     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1659     struct gen10_hevc_enc_status_buffer *status_buffer;
1660     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1661
1662     status_buffer = &vme_context->status_buffer;
1663
1664     intel_batchbuffer_start_atomic(batch, 0x1000);
1665
1666     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1667     mi_store_data_imm.bo = status_buffer->gpe_res.bo;
1668     mi_store_data_imm.offset = status_buffer->status_media_state_offset;
1669     mi_store_data_imm.dw0 = media_function;
1670     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1671
1672     intel_batchbuffer_emit_mi_flush(batch);
1673
1674     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1675     gen8_gpe_media_object(ctx, gpe_context, batch, param);
1676     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1677
1678     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1679
1680     intel_batchbuffer_end_atomic(batch);
1681
1682     intel_batchbuffer_flush(batch);
1683 }
1684
1685 static void
1686 gen10_run_kernel_media_object_walker(VADriverContextP ctx,
1687                                      struct intel_encoder_context *encoder_context,
1688                                      struct i965_gpe_context *gpe_context,
1689                                      int media_function,
1690                                      struct gpe_media_object_walker_parameter *param)
1691 {
1692     struct intel_batchbuffer *batch = encoder_context->base.batch;
1693     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1694     struct gen10_hevc_enc_status_buffer *status_buffer;
1695     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1696
1697     status_buffer = &vme_context->status_buffer;
1698
1699     intel_batchbuffer_start_atomic(batch, 0x1000);
1700
1701     intel_batchbuffer_emit_mi_flush(batch);
1702
1703     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1704     mi_store_data_imm.bo = status_buffer->gpe_res.bo;
1705     mi_store_data_imm.offset = status_buffer->status_media_state_offset;
1706     mi_store_data_imm.dw0 = media_function;
1707     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1708
1709     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1710     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
1711     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1712
1713     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1714
1715     intel_batchbuffer_end_atomic(batch);
1716
1717     intel_batchbuffer_flush(batch);
1718 }
1719
1720 #define BRC_CLIP(x, min, max)                                   \
1721     {                                                           \
1722         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
1723     }
1724
1725 #define GEN10_HEVC_MAX_BRC_PASSES           4
1726
1727 #define GEN10_HEVC_BRCINIT_ISCBR            0x0010
1728 #define GEN10_HEVC_BRCINIT_ISVBR            0x0020
1729 #define GEN10_HEVC_BRCINIT_ISCQP            0x4000
1730 #define GEN10_HEVC_BRCINIT_DISABLE_MBBRC    0x8000
1731
1732 static void
1733 gen10_hevc_enc_brc_init_set_curbe(VADriverContextP ctx,
1734                                   struct encode_state *encode_state,
1735                                   struct intel_encoder_context *encoder_context,
1736                                   struct i965_gpe_context *gpe_context)
1737 {
1738     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1739     struct gen10_hevc_enc_state *hevc_state;
1740     gen10_hevc_brc_init_curbe_data *brc_curbe;
1741     double input_bits_per_frame, bps_ratio;
1742
1743     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1744
1745     brc_curbe = i965_gpe_context_map_curbe(gpe_context);
1746
1747     if (!brc_curbe)
1748         return;
1749
1750     memset(brc_curbe, 0, sizeof(gen10_hevc_brc_init_curbe_data));
1751
1752     brc_curbe->dw0.profile_level_max_frame = hevc_state->profile_level_max_frame;
1753     brc_curbe->dw1.init_buf_full           = hevc_state->brc.init_vbv_buffer_fullness_in_bit;
1754     brc_curbe->dw2.buf_size                = hevc_state->brc.vbv_buffer_size_in_bit;
1755     brc_curbe->dw3.target_bit_rate         = hevc_state->brc.target_bit_rate;
1756     brc_curbe->dw4.maximum_bit_rate        = hevc_state->brc.max_bit_rate;
1757     brc_curbe->dw5.minimum_bit_rate        = 0;
1758     brc_curbe->dw6.frame_ratem             = hevc_state->brc.frame_rate_m;
1759     brc_curbe->dw7.frame_rated             = hevc_state->brc.frame_rate_d;
1760     if (hevc_state->brc.lcu_brc_enabled)
1761         brc_curbe->dw8.brc_flag            = 0;
1762     else
1763         brc_curbe->dw8.brc_flag            = GEN10_HEVC_BRCINIT_DISABLE_MBBRC;
1764
1765     brc_curbe->dw25.ac_qp_buffer = 1;
1766     brc_curbe->dw25.log2_max_cu_size = hevc_state->is_64lcu ? 6 : 5;
1767     brc_curbe->dw25.sliding_wind_size = 30;
1768
1769     if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP) {
1770         brc_curbe->dw8.brc_flag               = GEN10_HEVC_BRCINIT_ISCQP;
1771     } else if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CBR) {
1772         brc_curbe->dw8.brc_flag               |= GEN10_HEVC_BRCINIT_ISCBR;
1773     } else if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_VBR) {
1774         brc_curbe->dw8.brc_flag               |= GEN10_HEVC_BRCINIT_ISVBR;
1775     }
1776
1777     brc_curbe->dw9.frame_width   = hevc_state->frame_width;
1778     brc_curbe->dw10.frame_height = hevc_state->frame_height;
1779     brc_curbe->dw10.avbr_accuracy = 30;
1780     brc_curbe->dw11.avbr_convergence = 150;
1781
1782     brc_curbe->dw14.max_brc_level = 1;
1783     brc_curbe->dw8.brc_gopp                  = hevc_state->brc.gop_p;
1784     brc_curbe->dw9.brc_gopb                  = hevc_state->brc.gop_b;
1785
1786     brc_curbe->dw11.minimum_qp = 1;
1787     brc_curbe->dw12.maximum_qp = 51;
1788
1789     brc_curbe->dw16.instant_rate_thr0_pframe      = 40;
1790     brc_curbe->dw16.instant_rate_thr1_pframe      = 60;
1791     brc_curbe->dw16.instant_rate_thr2_pframe      = 80;
1792     brc_curbe->dw16.instant_rate_thr3_pframe      = 120;
1793     brc_curbe->dw17.instant_rate_thr0_bframe      = 35;
1794     brc_curbe->dw17.instant_rate_thr1_bframe      = 60;
1795     brc_curbe->dw17.instant_rate_thr2_bframe      = 80;
1796     brc_curbe->dw17.instant_rate_thr3_bframe      = 120;
1797     brc_curbe->dw18.instant_rate_thr0_iframe      = 40;
1798     brc_curbe->dw18.instant_rate_thr1_iframe      = 60;
1799     brc_curbe->dw18.instant_rate_thr2_iframe      = 90;
1800     brc_curbe->dw18.instant_rate_thr3_iframe      = 115;
1801
1802     input_bits_per_frame = (double)(brc_curbe->dw4.maximum_bit_rate) * ((double)(hevc_state->brc.frame_rate_d)) /
1803                            ((double)(hevc_state->brc.frame_rate_m));
1804
1805     if (brc_curbe->dw2.buf_size < (uint32_t)input_bits_per_frame * 4)
1806         brc_curbe->dw2.buf_size = (uint32_t)input_bits_per_frame * 4;
1807
1808     if (!brc_curbe->dw1.init_buf_full)
1809         brc_curbe->dw1.init_buf_full = 7 * brc_curbe->dw2.buf_size / 8;
1810     else if (brc_curbe->dw1.init_buf_full < (uint32_t)input_bits_per_frame * 2)
1811         brc_curbe->dw1.init_buf_full = (uint32_t)input_bits_per_frame * 2;
1812     else if (brc_curbe->dw1.init_buf_full > brc_curbe->dw2.buf_size)
1813         brc_curbe->dw1.init_buf_full = brc_curbe->dw2.buf_size;
1814
1815     bps_ratio = input_bits_per_frame / ((double)(hevc_state->brc.vbv_buffer_size_in_bit) / 30);
1816
1817     BRC_CLIP(bps_ratio, 0.1, 3.5);
1818
1819     brc_curbe->dw19.deviation_thr0_pbframe      = (uint32_t)(-50 * pow(0.90, bps_ratio));
1820     brc_curbe->dw19.deviation_thr1_pbframe      = (uint32_t)(-50 * pow(0.66, bps_ratio));
1821     brc_curbe->dw19.deviation_thr2_pbframe      = (uint32_t)(-50 * pow(0.46, bps_ratio));
1822     brc_curbe->dw19.deviation_thr3_pbframe      = (uint32_t)(-50 * pow(0.3, bps_ratio));
1823
1824     brc_curbe->dw20.deviation_thr4_pbframe      = (uint32_t)(50 * pow(0.3, bps_ratio));
1825     brc_curbe->dw20.deviation_thr5_pbframe      = (uint32_t)(50 * pow(0.46, bps_ratio));
1826     brc_curbe->dw20.deviation_thr6_pbframe      = (uint32_t)(50 * pow(0.7, bps_ratio));
1827     brc_curbe->dw20.deviation_thr7_pbframe      = (uint32_t)(50 * pow(0.9, bps_ratio));
1828
1829     brc_curbe->dw21.deviation_thr0_vbrctrl   = (uint32_t)(-50 * pow(0.9, bps_ratio));
1830     brc_curbe->dw21.deviation_thr1_vbrctrl   = (uint32_t)(-50 * pow(0.7, bps_ratio));
1831     brc_curbe->dw21.deviation_thr2_vbrctrl   = (uint32_t)(-50 * pow(0.5, bps_ratio));
1832     brc_curbe->dw21.deviation_thr3_vbrctrl   = (uint32_t)(-50 * pow(0.3, bps_ratio));
1833
1834     brc_curbe->dw22.deviation_thr4_vbrctrl   = (uint32_t)(100 * pow(0.4, bps_ratio));
1835     brc_curbe->dw22.deviation_thr5_vbrctrl   = (uint32_t)(100 * pow(0.5, bps_ratio));
1836     brc_curbe->dw22.deviation_thr6_vbrctrl   = (uint32_t)(100 * pow(0.75, bps_ratio));
1837     brc_curbe->dw22.deviation_thr7_vbrctrl   = (uint32_t)(100 * pow(0.9, bps_ratio));
1838
1839     brc_curbe->dw23.deviation_thr0_iframe       = (uint32_t)(-50 * pow(0.8, bps_ratio));
1840     brc_curbe->dw23.deviation_thr1_iframe       = (uint32_t)(-50 * pow(0.6, bps_ratio));
1841     brc_curbe->dw23.deviation_thr2_iframe       = (uint32_t)(-50 * pow(0.34, bps_ratio));
1842     brc_curbe->dw23.deviation_thr3_iframe       = (uint32_t)(-50 * pow(0.2, bps_ratio));
1843
1844     brc_curbe->dw24.deviation_thr4_iframe       = (uint32_t)(50 * pow(0.2, bps_ratio));
1845     brc_curbe->dw24.deviation_thr5_iframe       = (uint32_t)(50 * pow(0.4, bps_ratio));
1846     brc_curbe->dw24.deviation_thr6_iframe       = (uint32_t)(50 * pow(0.66, bps_ratio));
1847     brc_curbe->dw24.deviation_thr7_iframe       = (uint32_t)(50 * pow(0.9, bps_ratio));
1848
1849     if (!hevc_state->brc.brc_inited)
1850         hevc_state->brc.brc_init_current_target_buf_full_in_bits = brc_curbe->dw1.init_buf_full;
1851
1852     hevc_state->brc.brc_init_reset_buf_size_in_bits    = (double)brc_curbe->dw2.buf_size;
1853     hevc_state->brc.brc_init_reset_input_bits_per_frame  = input_bits_per_frame;
1854
1855     i965_gpe_context_unmap_curbe(gpe_context);
1856 }
1857
1858 static void
1859 gen10_hevc_enc_brc_init_add_surfaces(VADriverContextP ctx,
1860                                      struct encode_state *encode_state,
1861                                      struct intel_encoder_context *encoder_context,
1862                                      struct i965_gpe_context *gpe_context)
1863 {
1864     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1865
1866     i965_add_buffer_gpe_surface(ctx,
1867                                 gpe_context,
1868                                 &vme_context->res_brc_history_buffer,
1869                                 0,
1870                                 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
1871                                 0,
1872                                 0);
1873
1874     i965_add_buffer_2d_gpe_surface(ctx,
1875                                    gpe_context,
1876                                    &vme_context->res_brc_me_dist_surface,
1877                                    1,
1878                                    I965_SURFACEFORMAT_R8_UNORM,
1879                                    1);
1880 }
1881
1882 static void
1883 gen10_hevc_enc_brc_init_reset(VADriverContextP ctx,
1884                               struct encode_state *encode_state,
1885                               struct intel_encoder_context *encoder_context)
1886 {
1887     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1888     struct gen10_hevc_enc_state *hevc_state;
1889     struct gpe_media_object_parameter media_object_param;
1890     struct i965_gpe_context *gpe_context;
1891     int gpe_index = GEN10_HEVC_BRC_INIT;
1892     int media_function = GEN10_HEVC_MEDIA_STATE_BRC_INIT_RESET;
1893
1894     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1895
1896     if (hevc_state->brc.brc_inited)
1897         gpe_index = GEN10_HEVC_BRC_RESET;
1898
1899     gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
1900
1901     gen8_gpe_context_init(ctx, gpe_context);
1902     gen9_gpe_reset_binding_table(ctx, gpe_context);
1903
1904     gen10_hevc_enc_brc_init_set_curbe(ctx, encode_state, encoder_context, gpe_context);
1905     gen10_hevc_enc_brc_init_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
1906
1907     gen8_gpe_setup_interface_data(ctx, gpe_context);
1908
1909     memset(&media_object_param, 0, sizeof(media_object_param));
1910     gen10_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1911 }
1912
1913 static void
1914 gen10_hevc_brc_add_pic_img_state(VADriverContextP ctx,
1915                                  struct encode_state *encode_state,
1916                                  struct intel_encoder_context *encoder_context)
1917 {
1918     struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
1919     struct gen10_hevc_enc_state *hevc_state;
1920     VAEncPictureParameterBufferHEVC  *pic_param;
1921     VAEncSequenceParameterBufferHEVC *seq_param;
1922     VAEncSliceParameterBufferHEVC *slice_param;
1923     unsigned int batch_value = 0, tmp_value, i;
1924     uint32_t *batch_ptr, *buffer_ptr;
1925
1926     hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
1927
1928     buffer_ptr = (uint32_t *)i965_map_gpe_resource(&pak_context->res_brc_pic_image_state_read_buffer);
1929
1930     if (!buffer_ptr)
1931         return;
1932
1933     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1934     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1935     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1936
1937     for (i = 0; i < 4; i++) {
1938         batch_ptr = buffer_ptr + 32 * i;
1939
1940
1941         /* DW 0 */
1942         *(batch_ptr++) = HCP_PIC_STATE | (31 - 2);
1943
1944         /* DW 1 */
1945         batch_value = (pak_context->frame_info.width_in_cu - 1) |
1946                       ((pak_context->frame_info.height_in_cu - 1) << 16);
1947         batch_value |= pic_param->pic_fields.bits.transform_skip_enabled_flag << 15;
1948         *(batch_ptr++) = batch_value;
1949
1950         batch_value = (seq_param->log2_min_pcm_luma_coding_block_size_minus3 << 8) |
1951                       (seq_param->log2_max_pcm_luma_coding_block_size_minus3 << 10) |
1952                       (seq_param->log2_min_transform_block_size_minus2  << 4) |
1953                       ((seq_param->log2_min_transform_block_size_minus2 +
1954                         seq_param->log2_diff_max_min_transform_block_size) << 6) |
1955                       ((seq_param->log2_min_luma_coding_block_size_minus3 +
1956                         seq_param->log2_diff_max_min_luma_coding_block_size) << 2) |
1957                       (seq_param->log2_min_luma_coding_block_size_minus3 << 0);
1958
1959         /* DW 2 */
1960         *(batch_ptr++) = batch_value;
1961
1962         /* DW 3 */
1963         *(batch_ptr++) = 0;
1964
1965         /* DW 4 */
1966         batch_value = 0;
1967         if ((slice_param->slice_fields.bits.slice_sao_luma_flag ||
1968              slice_param->slice_fields.bits.slice_sao_chroma_flag) &&
1969             !hevc_state->is_10bit)
1970             batch_value |= (1 << 3);
1971
1972         if (pic_param->pic_fields.bits.cu_qp_delta_enabled_flag) {
1973             tmp_value = pic_param->diff_cu_qp_delta_depth;
1974             batch_value |= (1 << 5) | (tmp_value << 6);
1975         }
1976         batch_value |= (0 << 4) |
1977                        (seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8) |
1978                        (0 << 9) |
1979                        (0 << 10) | //(pic_param->log2_parallel_merge_level_minus2
1980                        (0 << 13) |
1981                        (0 << 15) |
1982                        (0 << 17) | //tile is disabled.
1983                        (pic_param->pic_fields.bits.weighted_bipred_flag << 18) |
1984                        (pic_param->pic_fields.bits.weighted_pred_flag << 19) |
1985                        (0 << 20) | //20/21 is reserved.
1986                        (pic_param->pic_fields.bits.transform_skip_enabled_flag << 22) |
1987                        (seq_param->seq_fields.bits.amp_enabled_flag << 23) |
1988                        (pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25) |
1989                        (seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26) |
1990                        (0 << 27); // VME CU packet
1991
1992         *(batch_ptr++) = batch_value;
1993
1994         /* DW 5 */
1995         batch_value = (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
1996                       (pic_param->pps_cb_qp_offset & 0x1f);
1997         batch_value |= (seq_param->max_transform_hierarchy_depth_inter << 13) |
1998                        (seq_param->max_transform_hierarchy_depth_intra << 10) |
1999                        (seq_param->pcm_sample_bit_depth_luma_minus1 << 20) |
2000                        (seq_param->pcm_sample_bit_depth_chroma_minus1 << 16) |
2001                        (seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27) |
2002                        (seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24);
2003         *(batch_ptr++) = batch_value;
2004
2005         /* DW6 */
2006         batch_value = pic_param->ctu_max_bitsize_allowed;
2007         batch_value |= (0 << 24 |
2008                         1 << 25 |
2009                         1 << 26 |
2010                         0 << 29); // bit 29 reload slice_pointer_flag.
2011
2012         if (i == 0)
2013             batch_value |= (0 << 16); // Initial pass
2014         else
2015             batch_value |= (1 << 16); // subsequent pass
2016         *(batch_ptr++) = batch_value;
2017
2018         /* DW 7. Frame_rate Max */
2019         *(batch_ptr++) = 0;
2020
2021         /* Dw 8. Frame_rate Min */
2022         *(batch_ptr++) = 0;
2023
2024         /* DW 9. Frame_rate Min/MAX slice_delta */
2025         *(batch_ptr++) = 0;
2026
2027         /* DW 10..17 */
2028         *(batch_ptr++) = 0;
2029         *(batch_ptr++) = 0;
2030         *(batch_ptr++) = 0;
2031         *(batch_ptr++) = 0;
2032         *(batch_ptr++) = 0;
2033         *(batch_ptr++) = 0;
2034         *(batch_ptr++) = 0;
2035         *(batch_ptr++) = 0;
2036
2037         /* DW 18 */
2038         *(batch_ptr++) = 0;
2039
2040         /* DW 19..20 */
2041         *(batch_ptr++) = 0;
2042         *(batch_ptr++) = 0;
2043
2044         /* DW 21..30 */
2045         *(batch_ptr++) = 0;
2046         *(batch_ptr++) = 0;
2047         *(batch_ptr++) = 0;
2048         *(batch_ptr++) = 0;
2049         *(batch_ptr++) = 0;
2050         *(batch_ptr++) = 0;
2051         *(batch_ptr++) = 0;
2052         *(batch_ptr++) = 0;
2053         *(batch_ptr++) = 0;
2054         *(batch_ptr++) = 0;
2055
2056         /* DW 31 */
2057         *(batch_ptr++) = MI_BATCH_BUFFER_END;
2058     }
2059
2060     i965_unmap_gpe_resource(&pak_context->res_brc_pic_image_state_read_buffer);
2061 }
2062
2063 static VAStatus
2064 gen10_hevc_enc_brc_frame_update_add_surfaces(VADriverContextP ctx,
2065                                              struct encode_state *encode_state,
2066                                              struct intel_encoder_context *encoder_context,
2067                                              struct i965_gpe_context *gpe_context)
2068 {
2069     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2070     struct gen10_hevc_enc_state *hevc_state;
2071     int pak_read_idx;
2072
2073     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2074
2075     i965_add_buffer_gpe_surface(ctx,
2076                                 gpe_context,
2077                                 &vme_context->res_brc_history_buffer,
2078                                 0,
2079                                 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
2080                                 0,
2081                                 0);
2082
2083     pak_read_idx = !hevc_state->curr_pak_stat_index;
2084     i965_add_buffer_gpe_surface(ctx,
2085                                 gpe_context,
2086                                 &vme_context->res_brc_pak_statistics_buffer[pak_read_idx],
2087                                 0,
2088                                 BYTES2UINT32(vme_context->res_brc_pak_statistics_buffer[pak_read_idx].size),
2089                                 0,
2090                                 1);
2091
2092     i965_add_buffer_gpe_surface(ctx,
2093                                 gpe_context,
2094                                 &vme_context->res_brc_pic_image_state_read_buffer,
2095                                 0,
2096                                 BYTES2UINT32(vme_context->res_brc_pic_image_state_read_buffer.size),
2097                                 0,
2098                                 2);
2099
2100     i965_add_buffer_gpe_surface(ctx,
2101                                 gpe_context,
2102                                 &vme_context->res_brc_pic_image_state_write_buffer,
2103                                 0,
2104                                 BYTES2UINT32(vme_context->res_brc_pic_image_state_write_buffer.size),
2105                                 0,
2106                                 3);
2107
2108     i965_add_buffer_gpe_surface(ctx,
2109                                 gpe_context,
2110                                 &vme_context->res_brc_input_enc_kernel_buffer,
2111                                 0,
2112                                 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
2113                                 0,
2114                                 4);
2115
2116     i965_add_buffer_2d_gpe_surface(ctx,
2117                                    gpe_context,
2118                                    &vme_context->res_brc_me_dist_surface,
2119                                    1,
2120                                    I965_SURFACEFORMAT_R8_UNORM,
2121                                    5);
2122
2123     i965_add_buffer_2d_gpe_surface(ctx,
2124                                    gpe_context,
2125                                    &vme_context->res_brc_const_data_surface,
2126                                    1,
2127                                    I965_SURFACEFORMAT_R8_UNORM,
2128                                    6);
2129
2130     i965_add_buffer_2d_gpe_surface(ctx,
2131                                    gpe_context,
2132                                    &vme_context->res_mb_stat_surface,
2133                                    1,
2134                                    I965_SURFACEFORMAT_R8_UNORM,
2135                                    7);
2136
2137     i965_add_buffer_gpe_surface(ctx,
2138                                 gpe_context,
2139                                 &vme_context->res_mv_dist_sum_buffer,
2140                                 0,
2141                                 BYTES2UINT32(vme_context->res_mv_dist_sum_buffer.size),
2142                                 0,
2143                                 8);
2144
2145     return VA_STATUS_SUCCESS;
2146 }
2147
2148 static VAStatus
2149 gen10_hevc_enc_brc_update_set_curbe(VADriverContextP ctx,
2150                                     struct encode_state *encode_state,
2151                                     struct intel_encoder_context *encoder_context,
2152                                     struct i965_gpe_context *gpe_context,
2153                                     int lcu_update)
2154 {
2155     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2156     struct gen10_hevc_enc_state *hevc_state;
2157     gen10_hevc_brc_update_curbe_data      *brc_update;
2158     VAEncSliceParameterBufferHEVC *slice_param;
2159     VAEncPictureParameterBufferHEVC  *pic_param;
2160
2161     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2162
2163     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2164     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2165
2166     brc_update = i965_gpe_context_map_curbe(gpe_context);
2167
2168     if (!brc_update)
2169         return VA_STATUS_ERROR_OPERATION_FAILED;
2170
2171     memset(brc_update, 0, sizeof(gen10_hevc_brc_update_curbe_data));
2172
2173     if (hevc_state->brc.brc_init_current_target_buf_full_in_bits >
2174         (double)hevc_state->brc.brc_init_reset_buf_size_in_bits) {
2175         hevc_state->brc.brc_init_current_target_buf_full_in_bits -=
2176             (double)hevc_state->brc.brc_init_reset_buf_size_in_bits;
2177         brc_update->dw5.target_size_flag  = 1;
2178     }
2179
2180     brc_update->dw0.target_size    = (uint32_t)(hevc_state->brc.brc_init_current_target_buf_full_in_bits);
2181     brc_update->dw1.frame_num      = hevc_state->frame_number;
2182
2183     brc_update->dw2.picture_header_size = gen10_hevc_enc_get_pic_header_size(encode_state);
2184
2185     if (slice_param->slice_type == HEVC_SLICE_I)
2186         brc_update->dw5.curr_frame_brclevel = 2;
2187     else if (slice_param->slice_type == HEVC_SLICE_P ||
2188              hevc_state->low_delay)
2189         brc_update->dw5.curr_frame_brclevel = 0;
2190     else
2191         brc_update->dw5.curr_frame_brclevel = 1;
2192
2193     brc_update->dw5.max_num_paks = GEN10_HEVC_MAX_BRC_PASSES;
2194
2195     if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP) {
2196         int qp_value;
2197
2198         qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2199         BRC_CLIP(qp_value, 1, 51);
2200         brc_update->dw6.cqp_value = qp_value;
2201     }
2202
2203     brc_update->dw14.parallel_mode = 0;
2204
2205     if (lcu_update == 1)
2206         hevc_state->brc.brc_init_current_target_buf_full_in_bits +=
2207             hevc_state->brc.brc_init_reset_input_bits_per_frame;
2208
2209     brc_update->dw3.start_gadj_frame0 = 10;
2210     brc_update->dw3.start_gadj_frame1 = 50;
2211     brc_update->dw4.start_gadj_frame2 = 100;
2212     brc_update->dw4.start_gadj_frame3 = 150;
2213
2214     brc_update->dw8.start_gadj_mult0 = 1;
2215     brc_update->dw8.start_gadj_mult1 = 1;
2216     brc_update->dw8.start_gadj_mult2 = 3;
2217     brc_update->dw8.start_gadj_mult3 = 2;
2218     brc_update->dw9.start_gadj_mult4 = 1;
2219
2220     brc_update->dw9.start_gadj_divd0 = 40;
2221     brc_update->dw9.start_gadj_divd1 = 5;
2222     brc_update->dw9.start_gadj_divd2 = 5;
2223     brc_update->dw10.start_gadj_divd3 = 3;
2224     brc_update->dw10.start_gadj_divd4 = 1;
2225
2226     brc_update->dw10.qp_threshold0 = 7;
2227     brc_update->dw10.qp_threshold1 = 18;
2228     brc_update->dw11.qp_threshold2 = 25;
2229     brc_update->dw11.qp_threshold3 = 37;
2230
2231     brc_update->dw11.grate_ratio_thr0 = 40;
2232     brc_update->dw11.grate_ratio_thr1 = 75;
2233     brc_update->dw12.grate_ratio_thr2 = 97;
2234     brc_update->dw12.grate_ratio_thr3 = 103;
2235     brc_update->dw12.grate_ratio_thr4 = 125;
2236     brc_update->dw12.grate_ratio_thr5 = 160;
2237
2238     brc_update->dw13.grate_ratio_thr6 = -3;
2239     brc_update->dw13.grate_ratio_thr7 = -2;
2240     brc_update->dw13.grate_ratio_thr8 = -1;
2241     brc_update->dw13.grate_ratio_thr9 = 0;
2242
2243     brc_update->dw14.grate_ratio_thr10 = 1;
2244     brc_update->dw14.grate_ratio_thr11 = 2;
2245     brc_update->dw14.grate_ratio_thr12 = 3;
2246
2247     i965_gpe_context_unmap_curbe(gpe_context);
2248     return VA_STATUS_SUCCESS;
2249 }
2250
2251 static void
2252 gen10_hevc_enc_brc_frame_update_kernel(VADriverContextP ctx,
2253                                        struct encode_state *encode_state,
2254                                        struct intel_encoder_context *encoder_context)
2255 {
2256     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2257     struct i965_gpe_context *gpe_context;
2258     int gpe_index = GEN10_HEVC_BRC_FRAME_UPDATE;
2259     int media_function = GEN10_HEVC_MEDIA_STATE_BRC_UPDATE;
2260     struct gpe_media_object_parameter media_object_param;
2261
2262     gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
2263
2264     gen8_gpe_context_init(ctx, gpe_context);
2265     gen9_gpe_reset_binding_table(ctx, gpe_context);
2266
2267     gen10_hevc_brc_add_pic_img_state(ctx, encode_state, encoder_context);
2268     gen10_hevc_enc_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context, 0);
2269     gen10_hevc_enc_brc_frame_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2270     gen8_gpe_setup_interface_data(ctx, gpe_context);
2271
2272     memset(&media_object_param, 0, sizeof(media_object_param));
2273     gen10_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
2274 }
2275
2276 static void
2277 gen10_hevc_enc_brc_lcu_update_add_surfaces(VADriverContextP ctx,
2278                                            struct encode_state *encode_state,
2279                                            struct intel_encoder_context *encoder_context,
2280                                            struct i965_gpe_context *gpe_context)
2281 {
2282     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2283
2284     i965_add_buffer_gpe_surface(ctx,
2285                                 gpe_context,
2286                                 &vme_context->res_brc_history_buffer,
2287                                 0,
2288                                 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
2289                                 0,
2290                                 0);
2291
2292     i965_add_buffer_2d_gpe_surface(ctx,
2293                                    gpe_context,
2294                                    &vme_context->res_brc_me_dist_surface,
2295                                    1,
2296                                    I965_SURFACEFORMAT_R8_UNORM,
2297                                    1);
2298
2299     i965_add_buffer_2d_gpe_surface(ctx,
2300                                    gpe_context,
2301                                    &vme_context->res_mb_stat_surface,
2302                                    1,
2303                                    I965_SURFACEFORMAT_R8_UNORM,
2304                                    2);
2305
2306     i965_add_buffer_2d_gpe_surface(ctx,
2307                                    gpe_context,
2308                                    &vme_context->res_brc_mb_qp_surface,
2309                                    1,
2310                                    I965_SURFACEFORMAT_R8_UNORM,
2311                                    3);
2312
2313     i965_add_buffer_2d_gpe_surface(ctx,
2314                                    gpe_context,
2315                                    &vme_context->res_mb_split_surface,
2316                                    1,
2317                                    I965_SURFACEFORMAT_R8_UNORM,
2318                                    4);
2319
2320     i965_add_buffer_2d_gpe_surface(ctx,
2321                                    gpe_context,
2322                                    &vme_context->res_brc_intra_dist_surface,
2323                                    1,
2324                                    I965_SURFACEFORMAT_R8_UNORM,
2325                                    5);
2326
2327     i965_add_buffer_2d_gpe_surface(ctx,
2328                                    gpe_context,
2329                                    &vme_context->res_cu_split_surface,
2330                                    1,
2331                                    I965_SURFACEFORMAT_R8_UNORM,
2332                                    6);
2333 }
2334
2335 static void
2336 gen10_hevc_enc_brc_lcu_update_kernel(VADriverContextP ctx,
2337                                      struct encode_state *encode_state,
2338                                      struct intel_encoder_context *encoder_context)
2339 {
2340     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2341     struct gen10_hevc_enc_state *hevc_state;
2342     struct i965_gpe_context *gpe_context;
2343     int gpe_index = GEN10_HEVC_BRC_LCU_UPDATE;
2344     int media_function = GEN10_HEVC_MEDIA_STATE_BRC_LCU_UPDATE;
2345     uint32_t resolution_x, resolution_y;
2346     struct gpe_media_object_walker_parameter media_object_walker_param;
2347     struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2348
2349     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2350
2351     gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
2352
2353     gen8_gpe_context_init(ctx, gpe_context);
2354     gen9_gpe_reset_binding_table(ctx, gpe_context);
2355
2356     gen10_hevc_enc_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context, 1);
2357     gen10_hevc_enc_brc_lcu_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2358
2359     gen8_gpe_setup_interface_data(ctx, gpe_context);
2360
2361     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2362
2363     resolution_x = ALIGN(hevc_state->frame_width, 16) >> 4;
2364     resolution_x = ALIGN(resolution_x, 16) >> 4;
2365     resolution_y = ALIGN(hevc_state->frame_height, 16) >> 4;
2366     resolution_y = ALIGN(resolution_y, 8) >> 3;
2367     kernel_walker_param.resolution_x = resolution_x;
2368     kernel_walker_param.resolution_y = resolution_y;
2369     kernel_walker_param.no_dependency = 1;
2370
2371     gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2372
2373     gen10_run_kernel_media_object_walker(ctx, encoder_context,
2374                                          gpe_context,
2375                                          media_function,
2376                                          &media_object_walker_param);
2377 }
2378
2379 static void
2380 gen10_hevc_enc_scaling_curbe(VADriverContextP ctx,
2381                              struct encode_state *encode_state,
2382                              struct intel_encoder_context *encoder_context,
2383                              struct i965_gpe_context *gpe_context,
2384                              struct gen10_hevc_scaling_conversion_param *scale_param)
2385 {
2386     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2387     struct gen10_hevc_enc_state *hevc_state;
2388     gen10_hevc_scaling_curbe_data      *scaling_curbe;
2389
2390     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2391     scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
2392
2393     if (!scaling_curbe)
2394         return;
2395
2396     memset(scaling_curbe, 0, sizeof(gen10_hevc_scaling_curbe_data));
2397
2398     scaling_curbe->dw0.input_bit_depth_for_chroma = 10;
2399     scaling_curbe->dw0.input_bit_depth_for_luma   = 10;
2400     scaling_curbe->dw0.output_bit_depth_for_chroma = 8;
2401     scaling_curbe->dw0.output_bit_depth_for_luma   = 8;
2402     scaling_curbe->dw0.rounding_enabled   = 1;
2403
2404     scaling_curbe->dw1.convert_flag                = scale_param->scale_flag.conv_enable;
2405     scaling_curbe->dw1.downscale_stage             = scale_param->scale_flag.ds_type;
2406     scaling_curbe->dw1.mb_statistics_dump_flag     = scale_param->scale_flag.dump_enable;
2407     if (scale_param->scale_flag.is_64lcu) {
2408         scaling_curbe->dw1.lcu_size                 = 0;
2409         scaling_curbe->dw1.job_queue_size           = 32;
2410     } else {
2411         scaling_curbe->dw1.lcu_size                 = 1;
2412         scaling_curbe->dw1.job_queue_size           = 2656;
2413     }
2414
2415     scaling_curbe->dw2.orig_pic_width_in_pixel   = hevc_state->frame_width;
2416     scaling_curbe->dw2.orig_pic_height_in_pixel   = hevc_state->frame_height;
2417
2418     scaling_curbe->dw3.bti_input_conversion_surface    = GEN10_HEVC_SCALING_10BIT_Y;
2419     scaling_curbe->dw4.bti_input_ds_surface            = GEN10_HEVC_SCALING_8BIT_Y;
2420     scaling_curbe->dw5.bti_4x_ds_surface               = GEN10_HEVC_SCALING_4xDS;
2421     scaling_curbe->dw6.bti_mbstat_surface              = GEN10_HEVC_SCALING_MB_STATS;
2422     scaling_curbe->dw7.bti_2x_ds_surface               = GEN10_HEVC_SCALING_2xDS;
2423     scaling_curbe->dw8.bti_mb_split_surface            = GEN10_HEVC_SCALING_MB_SPLIT_SURFACE;
2424     scaling_curbe->dw9.bti_lcu32_jobqueue_buffer_surface  = GEN10_HEVC_SCALING_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
2425     scaling_curbe->dw10.bti_lcu64_lcu32_jobqueue_buffer_surface = GEN10_HEVC_SCALING_LCU64_JOB_QUEUE_SCRATCH_SURFACE;
2426     scaling_curbe->dw11.bti_lcu64_cu32_distortion_surface  = GEN10_HEVC_SCALING_LCU64_64x64_DISTORTION_SURFACE;
2427
2428     i965_gpe_context_unmap_curbe(gpe_context);
2429 }
2430
2431 static void
2432 gen10_hevc_enc_scaling_surfaces(VADriverContextP ctx,
2433                                 struct encode_state *encode_state,
2434                                 struct intel_encoder_context *encoder_context,
2435                                 struct i965_gpe_context *gpe_context,
2436                                 struct gen10_hevc_scaling_conversion_param *scale_param)
2437 {
2438     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2439     int input_bti = 0;
2440     struct object_surface *obj_surface;
2441
2442     if (scale_param->scale_flag.conv_enable) {
2443         obj_surface = scale_param->input_surface;
2444         i965_add_2d_gpe_surface(ctx,
2445                                 gpe_context,
2446                                 obj_surface,
2447                                 0,
2448                                 1,
2449                                 I965_SURFACEFORMAT_R32_UNORM,
2450                                 input_bti);
2451         input_bti++;
2452
2453         i965_add_2d_gpe_surface(ctx,
2454                                 gpe_context,
2455                                 obj_surface,
2456                                 1,
2457                                 1,
2458                                 I965_SURFACEFORMAT_R16G16_UNORM,
2459                                 input_bti);
2460         input_bti++;
2461
2462         obj_surface = scale_param->converted_output_surface;
2463         i965_add_2d_gpe_surface(ctx,
2464                                 gpe_context,
2465                                 obj_surface,
2466                                 0,
2467                                 1,
2468                                 I965_SURFACEFORMAT_R8_UNORM,
2469                                 input_bti);
2470         input_bti++;
2471         i965_add_2d_gpe_surface(ctx,
2472                                 gpe_context,
2473                                 obj_surface,
2474                                 1,
2475                                 1,
2476                                 I965_SURFACEFORMAT_R16_UINT,
2477                                 input_bti);
2478         input_bti++;
2479     } else {
2480         input_bti = 2;
2481         obj_surface = scale_param->input_surface;
2482         i965_add_2d_gpe_surface(ctx,
2483                                 gpe_context,
2484                                 obj_surface,
2485                                 0,
2486                                 1,
2487                                 I965_SURFACEFORMAT_R32_UNORM,
2488                                 input_bti);
2489         input_bti++;
2490
2491         i965_add_2d_gpe_surface(ctx,
2492                                 gpe_context,
2493                                 obj_surface,
2494                                 1,
2495                                 1,
2496                                 I965_SURFACEFORMAT_R16_UINT,
2497                                 input_bti);
2498         input_bti++;
2499     }
2500
2501     if (scale_param->scale_flag.ds_type == GEN10_4X_DS ||
2502         scale_param->scale_flag.ds_type == GEN10_16X_DS ||
2503         scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2504         obj_surface = scale_param->scaled_4x_surface;
2505
2506         i965_add_2d_gpe_surface(ctx,
2507                                 gpe_context,
2508                                 obj_surface,
2509                                 0,
2510                                 1,
2511                                 I965_SURFACEFORMAT_R32_UNORM,
2512                                 input_bti);
2513         input_bti++;
2514     } else
2515         input_bti++;
2516
2517     i965_add_buffer_2d_gpe_surface(ctx,
2518                                    gpe_context,
2519                                    &vme_context->res_mb_stat_surface,
2520                                    1,
2521                                    I965_SURFACEFORMAT_R8_UNORM,
2522                                    input_bti);
2523     input_bti++;
2524
2525     if (scale_param->scale_flag.ds_type == GEN10_2X_DS ||
2526         scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2527         obj_surface = scale_param->scaled_2x_surface;
2528
2529         i965_add_2d_gpe_surface(ctx,
2530                                 gpe_context,
2531                                 obj_surface,
2532                                 0,
2533                                 1,
2534                                 I965_SURFACEFORMAT_R32_UNORM,
2535                                 input_bti);
2536         input_bti++;
2537     } else
2538         input_bti++;
2539
2540     i965_add_buffer_2d_gpe_surface(ctx,
2541                                    gpe_context,
2542                                    &vme_context->res_mb_split_surface,
2543                                    1,
2544                                    I965_SURFACEFORMAT_R8_UNORM,
2545                                    input_bti);
2546     input_bti++;
2547
2548     i965_add_buffer_gpe_surface(ctx,
2549                                 gpe_context,
2550                                 &vme_context->res_jbq_header_buffer,
2551                                 0,
2552                                 BYTES2UINT32(vme_context->res_jbq_header_buffer.size),
2553                                 0,
2554                                 input_bti);
2555     input_bti++;
2556
2557     i965_add_buffer_gpe_surface(ctx,
2558                                 gpe_context,
2559                                 &vme_context->res_jbq_header_lcu64_buffer,
2560                                 0,
2561                                 BYTES2UINT32(vme_context->res_jbq_header_lcu64_buffer.size),
2562                                 0,
2563                                 input_bti);
2564     input_bti++;
2565
2566     i965_add_buffer_gpe_surface(ctx,
2567                                 gpe_context,
2568                                 &vme_context->res_64x64_dist_buffer,
2569                                 0,
2570                                 BYTES2UINT32(vme_context->res_64x64_dist_buffer.size),
2571                                 0,
2572                                 input_bti);
2573     input_bti++;
2574 }
2575
2576 static void
2577 gen10_hevc_enc_scaling_kernel(VADriverContextP ctx,
2578                               struct encode_state *encode_state,
2579                               struct intel_encoder_context *encoder_context,
2580                               struct gen10_hevc_scaling_conversion_param *scale_param)
2581 {
2582     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2583     struct gen10_hevc_enc_state *hevc_state;
2584     struct i965_gpe_context *gpe_context;
2585     int media_function;
2586     struct gpe_media_object_walker_parameter media_object_walker_param;
2587     struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2588
2589     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2590
2591     gpe_context = &(vme_context->scaling_context.gpe_context);
2592
2593     gen8_gpe_context_init(ctx, gpe_context);
2594     gen9_gpe_reset_binding_table(ctx, gpe_context);
2595
2596     gen10_hevc_enc_scaling_curbe(ctx, encode_state, encoder_context, gpe_context, scale_param);
2597     gen10_hevc_enc_scaling_surfaces(ctx, encode_state, encoder_context, gpe_context, scale_param);
2598
2599     gen8_gpe_setup_interface_data(ctx, gpe_context);
2600
2601     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2602     if (scale_param->scale_flag.ds_type == GEN10_NONE_DS) {
2603         kernel_walker_param.resolution_x = hevc_state->frame_width >> 3;
2604         kernel_walker_param.resolution_y = hevc_state->frame_height >> 3;
2605         media_function = GEN10_HEVC_MEDIA_STATE_NO_SCALING;
2606     } else if (scale_param->scale_flag.ds_type == GEN10_2X_DS) {
2607         kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width >> 1, 64) >> 3;
2608         kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height >> 1, 64) >> 3;
2609         media_function = GEN10_HEVC_MEDIA_STATE_2X_SCALING;
2610     } else if (scale_param->scale_flag.ds_type == GEN10_4X_DS ||
2611                scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2612         kernel_walker_param.resolution_x = hevc_state->frame_width_4x >> 3;
2613         kernel_walker_param.resolution_y = hevc_state->frame_height_4x >> 3;
2614
2615         if (scale_param->scale_flag.ds_type == GEN10_4X_DS)
2616             media_function = GEN10_HEVC_MEDIA_STATE_4X_SCALING;
2617         else
2618             media_function = GEN10_HEVC_MEDIA_STATE_2X_4X_SCALING;
2619     } else {
2620         kernel_walker_param.resolution_x = hevc_state->frame_width_16x >> 3;
2621         kernel_walker_param.resolution_y = hevc_state->frame_height_16x >> 3;
2622
2623         media_function = GEN10_HEVC_MEDIA_STATE_16X_SCALING;
2624     }
2625     kernel_walker_param.no_dependency = 1;
2626
2627     gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2628
2629     gen10_run_kernel_media_object_walker(ctx, encoder_context,
2630                                          gpe_context,
2631                                          media_function,
2632                                          &media_object_walker_param);
2633 }
2634
2635 static void
2636 gen10_hevc_enc_conv_scaling_surface(VADriverContextP ctx,
2637                                     struct encode_state *encode_state,
2638                                     struct intel_encoder_context *encoder_context,
2639                                     struct object_surface *input_surface,
2640                                     struct object_surface *obj_surface,
2641                                     int only_for_reference)
2642 {
2643     struct gen10_hevc_enc_context *vme_context;
2644     struct gen10_hevc_enc_state *hevc_state;
2645     struct gen10_hevc_scaling_conversion_param scale_param;
2646     struct gen10_hevc_surface_priv *surface_priv;
2647
2648     vme_context = encoder_context->vme_context;
2649     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2650     surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
2651
2652     if (!hevc_state->is_10bit &&
2653         !hevc_state->is_64lcu &&
2654         !hevc_state->hme_supported)
2655         return;
2656
2657     memset(&scale_param, 0, sizeof(scale_param));
2658
2659     scale_param.input_surface = input_surface ? input_surface : obj_surface;
2660     scale_param.input_width = hevc_state->frame_width;
2661     scale_param.input_height = hevc_state->frame_height;
2662     scale_param.output_4x_width = hevc_state->frame_width_4x;
2663     scale_param.output_4x_height = hevc_state->frame_height_4x;
2664     scale_param.scaled_2x_surface = surface_priv->scaled_2x_surface;
2665     scale_param.scaled_4x_surface = surface_priv->scaled_4x_surface;
2666     scale_param.converted_output_surface = surface_priv->converted_surface;
2667
2668     if (hevc_state->is_10bit)
2669         scale_param.scale_flag.conv_enable = GEN10_DEPTH_CONV_ENABLE;
2670
2671     scale_param.scale_flag.is_64lcu = hevc_state->is_64lcu;
2672
2673     scale_param.scale_flag.dump_enable = 0;
2674     if (hevc_state->is_64lcu && hevc_state->hme_supported) {
2675         scale_param.scale_flag.ds_type = GEN10_2X_4X_DS;
2676         scale_param.scale_flag.dump_enable = hevc_state->brc.brc_enabled ? 1 : 0;
2677     } else if (hevc_state->is_64lcu)
2678         scale_param.scale_flag.ds_type = GEN10_2X_DS;
2679     else if (hevc_state->hme_supported) {
2680         scale_param.scale_flag.ds_type = GEN10_4X_DS;
2681         scale_param.scale_flag.dump_enable = hevc_state->brc.brc_enabled ? 1 : 0;
2682     } else
2683         scale_param.scale_flag.ds_type = GEN10_NONE_DS;
2684
2685     gen10_hevc_enc_scaling_kernel(ctx, encode_state,
2686                                   encoder_context,
2687                                   &scale_param);
2688
2689     if (only_for_reference)
2690         surface_priv->conv_scaling_done = 1;
2691
2692     if (!hevc_state->b16xme_supported ||
2693         only_for_reference)
2694         return;
2695
2696     memset(&scale_param, 0, sizeof(scale_param));
2697
2698     scale_param.input_surface = surface_priv->scaled_4x_surface;
2699     scale_param.scaled_4x_surface = surface_priv->scaled_16x_surface;
2700     scale_param.input_width = hevc_state->frame_width_4x;
2701     scale_param.input_height = hevc_state->frame_height_4x;
2702     scale_param.output_4x_width = hevc_state->frame_width_16x;
2703     scale_param.output_4x_height = hevc_state->frame_height_16x;
2704
2705     scale_param.scale_flag.ds_type = GEN10_16X_DS;
2706
2707     gen10_hevc_enc_scaling_kernel(ctx, encode_state,
2708                                   encoder_context,
2709                                   &scale_param);
2710 }
2711
2712 #define GEN10_HEVC_HME_STAGE_4X_NO_16X       0
2713 #define GEN10_HEVC_HME_STAGE_4X_AFTER_16X    1
2714 #define GEN10_HEVC_HME_STAGE_16X             2
2715
2716 static void
2717 gen10_hevc_enc_me_curbe(VADriverContextP ctx,
2718                         struct encode_state *encode_state,
2719                         struct intel_encoder_context *encoder_context,
2720                         struct i965_gpe_context *gpe_context,
2721                         uint32_t hme_level,
2722                         int dist_type)
2723 {
2724     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2725     struct gen10_hevc_enc_state *hevc_state;
2726     gen10_hevc_me_curbe_data      *me_curbe;
2727     VAEncSliceParameterBufferHEVC *slice_param;
2728     VAEncSequenceParameterBufferHEVC *seq_param;
2729
2730     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2731
2732     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2733     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2734
2735     me_curbe = i965_gpe_context_map_curbe(gpe_context);
2736
2737     if (!me_curbe)
2738         return;
2739
2740     memset(me_curbe, 0, sizeof(gen10_hevc_me_curbe_data));
2741
2742     me_curbe->dw0.rounded_frame_width_in_mv_for4x       = hevc_state->frame_width_4x >> 3;
2743     me_curbe->dw0.rounded_frame_height_in_mv_for4x      = hevc_state->frame_height_4x >> 3;
2744
2745     me_curbe->dw2.sub_pel_mode                          = 3;
2746     me_curbe->dw2.bme_disable_fbr                       = 1;
2747     me_curbe->dw2.inter_sad_adj                         = 2;
2748
2749     me_curbe->dw3.adaptive_search_en                    = 1;
2750     me_curbe->dw3.ime_ref_window_size                   = 1; // From the HW-spec
2751
2752     me_curbe->dw4.quarter_quad_tree_cand                = 1; // 32x32 split is enabled.
2753     me_curbe->dw4.bi_weight                             = 32; // default weight.
2754
2755     me_curbe->dw5.len_sp                                = 0x3F;
2756     me_curbe->dw5.max_num_su                            = 0x3F;
2757     me_curbe->dw5.start_center0_x                       = ((gen10_hevc_ime_ref_window_size[1][0] - 32) >> 3) & 0xF;
2758     me_curbe->dw5.start_center0_y                       = ((gen10_hevc_ime_ref_window_size[1][1] - 32) >> 3) & 0xF;
2759
2760     me_curbe->dw6.slice_type                            = (dist_type == GEN10_HEVC_ME_DIST_TYPE_INTER_BRC) ? 1 : 0;
2761     if (dist_type == GEN10_HEVC_ME_DIST_TYPE_INTER_BRC) {
2762         if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2763             me_curbe->dw6.hme_stage =
2764                 (hevc_state->b16xme_enabled) ? GEN10_HEVC_HME_STAGE_4X_AFTER_16X :
2765                 GEN10_HEVC_HME_STAGE_4X_NO_16X;
2766         else
2767             me_curbe->dw6.hme_stage = GEN10_HEVC_HME_STAGE_16X;
2768     } else
2769         me_curbe->dw6.hme_stage = GEN10_HEVC_HME_STAGE_4X_NO_16X;
2770
2771     if (slice_param->slice_type == HEVC_SLICE_I) {
2772         me_curbe->dw6.num_ref_l0 = 0;
2773         me_curbe->dw6.num_ref_l1 = 0;
2774     } else if (slice_param->slice_type == HEVC_SLICE_P) {
2775         me_curbe->dw6.num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
2776         me_curbe->dw6.num_ref_l1 = 0;
2777     } else {
2778         me_curbe->dw6.num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
2779         me_curbe->dw6.num_ref_l1 = hevc_state->low_delay ? 0 : slice_param->num_ref_idx_l1_active_minus1 + 1;
2780     }
2781
2782     me_curbe->dw7.rounded_frame_width_in_mv_for16x = hevc_state->frame_width_16x >> 3;
2783     me_curbe->dw7.rounded_frame_height_in_mv_for16x = hevc_state->frame_height_16x >> 3;
2784
2785     /* Search path */
2786     memcpy(&me_curbe->ime_search_path_03, gen10_hevc_me_search_path,
2787            sizeof(gen10_hevc_me_search_path));
2788
2789     me_curbe->dw24.coding_unit_size = 1;
2790     me_curbe->dw24.coding_unit_partition_mode = 0;
2791     me_curbe->dw24.coding_unit_prediction_mode = 1;
2792
2793     if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2794         me_curbe->dw25.frame_width_in_pixel_cs = hevc_state->frame_width >> 2;
2795         me_curbe->dw25.frame_height_in_pixel_cs = hevc_state->frame_height >> 2;
2796     } else {
2797         me_curbe->dw25.frame_width_in_pixel_cs = hevc_state->frame_width >> 4;
2798         me_curbe->dw25.frame_height_in_pixel_cs = hevc_state->frame_height >> 4;
2799     }
2800
2801     me_curbe->dw27.intra_compute_type = 1;
2802
2803     me_curbe->dw28.penalty_intra32x32_nondc = 36;
2804     me_curbe->dw28.penalty_intra16x16_nondc = 12;
2805     me_curbe->dw28.penalty_intra8x8_nondc = 4;
2806
2807     me_curbe->dw30.mode4_cost = 13;
2808     me_curbe->dw30.mode5_cost = 9;
2809     me_curbe->dw30.mode6_cost = 13;
2810     me_curbe->dw30.mode7_cost = 3;
2811     me_curbe->dw31.mode8_cost = 9;
2812
2813     me_curbe->dw32.sicintra_neighbor_avail_flag         = 0x3F;
2814     me_curbe->dw32.sic_inter_sad_measure                = 0x02;
2815     me_curbe->dw32.sic_intra_sad_measure                = 0x02;
2816
2817     me_curbe->dw33.sic_log2_min_cu_size                 = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
2818
2819     me_curbe->dw34.bti_hme_output_mv_data_surface       = GEN10_HEVC_HME_OUTPUT_MV_DATA;
2820     me_curbe->dw35.bti_16xinput_mv_data_surface         = GEN10_HEVC_HME_16xINPUT_MV_DATA;
2821     me_curbe->dw36.bti_4x_output_distortion_surface     = GEN10_HEVC_HME_4xOUTPUT_DISTORTION;
2822     me_curbe->dw37.bti_vme_input_surface                = GEN10_HEVC_HME_VME_PRED_CURR_PIC_IDX0;
2823     me_curbe->dw38.bti_4xds_surface                     = GEN10_HEVC_HME_4xDS_INPUT;
2824     me_curbe->dw39.bti_brc_distortion_surface           = GEN10_HEVC_HME_BRC_DISTORTION;
2825     me_curbe->dw40.bti_mv_and_distortion_sum_surface    = GEN10_HEVC_HME_MV_AND_DISTORTION_SUM;
2826
2827     i965_gpe_context_unmap_curbe(gpe_context);
2828 }
2829
2830 static void
2831 gen10_hevc_enc_me_surfaces(VADriverContextP ctx,
2832                            struct encode_state *encode_state,
2833                            struct intel_encoder_context *encoder_context,
2834                            struct i965_gpe_context *gpe_context,
2835                            uint32_t hme_level,
2836                            int dist_type)
2837 {
2838     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2839     struct gen10_hevc_enc_state *hevc_state;
2840     struct gen10_hevc_enc_frame_info *frame_info;
2841     struct gen10_hevc_enc_common_res *common_res;
2842     struct object_surface *obj_surface, *vme_surface;
2843     struct gen10_hevc_surface_priv *surface_priv;
2844     struct i965_gpe_resource *res_source;
2845     int input_bti, i;
2846
2847     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2848     frame_info = &vme_context->frame_info;
2849     common_res = &vme_context->common_res;
2850
2851     obj_surface = encode_state->reconstructed_object;
2852
2853     surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
2854
2855     if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2856         vme_surface = surface_priv->scaled_4x_surface;
2857         res_source = &vme_context->res_s4x_memv_data_surface;
2858     } else {
2859         vme_surface = surface_priv->scaled_16x_surface;
2860         res_source = &vme_context->res_s16x_memv_data_surface;
2861     }
2862
2863     input_bti = 0;
2864     i965_add_buffer_2d_gpe_surface(ctx, gpe_context, res_source,
2865                                    1, I965_SURFACEFORMAT_R8_UNORM,
2866                                    GEN10_HEVC_HME_OUTPUT_MV_DATA);
2867
2868     if (hme_level == GEN10_HEVC_HME_LEVEL_4X && hevc_state->b16xme_enabled)
2869         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2870                                        &vme_context->res_s16x_memv_data_surface,
2871                                        1, I965_SURFACEFORMAT_R8_UNORM,
2872                                        GEN10_HEVC_HME_16xINPUT_MV_DATA);
2873
2874     if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2875         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2876                                        &vme_context->res_s4x_me_dist_surface,
2877                                        1, I965_SURFACEFORMAT_R8_UNORM,
2878                                        GEN10_HEVC_HME_4xOUTPUT_DISTORTION);
2879
2880     input_bti = GEN10_HEVC_HME_VME_PRED_CURR_PIC_IDX0;
2881
2882     i965_add_adv_gpe_surface(ctx, gpe_context,
2883                              vme_surface,
2884                              input_bti);
2885     input_bti++;
2886
2887     for (i = 0; i < 4; i++) {
2888         struct object_surface *tmp_surface, *input_surface;
2889         struct gen10_hevc_surface_priv *tmp_hevc_surface;
2890
2891         if (frame_info->mapped_ref_idx_list0[i] >= 0)
2892             tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
2893         else
2894             tmp_surface = NULL;
2895
2896         if (tmp_surface && tmp_surface->private_data) {
2897             tmp_hevc_surface = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
2898
2899             if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2900                 input_surface = tmp_hevc_surface->scaled_4x_surface;
2901             else
2902                 input_surface = tmp_hevc_surface->scaled_16x_surface;
2903
2904             i965_add_adv_gpe_surface(ctx, gpe_context,
2905                                      input_surface,
2906                                      input_bti + 2 * i);
2907         } else
2908             i965_add_adv_gpe_surface(ctx, gpe_context,
2909                                      vme_surface,
2910                                      input_bti + 2 * i);
2911
2912         if (frame_info->mapped_ref_idx_list1[i] >= 0)
2913             tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
2914         else
2915             tmp_surface = NULL;
2916
2917         if (tmp_surface && tmp_surface->private_data) {
2918             tmp_hevc_surface = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
2919
2920             if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2921                 input_surface = tmp_hevc_surface->scaled_4x_surface;
2922             else
2923                 input_surface = tmp_hevc_surface->scaled_16x_surface;
2924
2925             i965_add_adv_gpe_surface(ctx, gpe_context,
2926                                      input_surface,
2927                                      input_bti + 2 * i + 1);
2928         } else
2929             i965_add_adv_gpe_surface(ctx, gpe_context,
2930                                      vme_surface,
2931                                      input_bti + 2 * i + 1);
2932     }
2933
2934     if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2935         i965_add_2d_gpe_surface(ctx,
2936                                 gpe_context,
2937                                 vme_surface,
2938                                 0,
2939                                 1,
2940                                 I965_SURFACEFORMAT_R8_UNORM,
2941                                 GEN10_HEVC_HME_4xDS_INPUT);
2942
2943         if (dist_type != GEN10_HEVC_ME_DIST_TYPE_INTRA)
2944             res_source = &vme_context->res_brc_me_dist_surface;
2945         else
2946             res_source = &vme_context->res_brc_intra_dist_surface;
2947
2948         i965_add_buffer_2d_gpe_surface(ctx, gpe_context, res_source,
2949                                        1, I965_SURFACEFORMAT_R8_UNORM,
2950                                        GEN10_HEVC_HME_BRC_DISTORTION);
2951     }
2952
2953     i965_add_buffer_gpe_surface(ctx, gpe_context,
2954                                 &vme_context->res_mv_dist_sum_buffer,
2955                                 1,
2956                                 vme_context->res_mv_dist_sum_buffer.size,
2957                                 0,
2958                                 GEN10_HEVC_HME_MV_AND_DISTORTION_SUM);
2959 }
2960
2961 static void
2962 gen10_hevc_enc_me_kernel(VADriverContextP ctx,
2963                          struct encode_state *encode_state,
2964                          struct intel_encoder_context *encoder_context,
2965                          int hme_level,
2966                          int dist_type)
2967 {
2968     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2969     struct gen10_hevc_enc_state *hevc_state;
2970     struct i965_gpe_context *gpe_context;
2971     int media_function;
2972     struct gpe_media_object_walker_parameter media_object_walker_param;
2973     struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2974
2975     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2976
2977     gpe_context = &(vme_context->me_context.gpe_context);
2978
2979     gen8_gpe_context_init(ctx, gpe_context);
2980     gen9_gpe_reset_binding_table(ctx, gpe_context);
2981
2982     gen10_hevc_enc_me_curbe(ctx, encode_state, encoder_context, gpe_context, hme_level, dist_type);
2983     gen10_hevc_enc_me_surfaces(ctx, encode_state, encoder_context, gpe_context, hme_level, dist_type);
2984
2985     gen8_gpe_setup_interface_data(ctx, gpe_context);
2986
2987     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2988
2989     if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2990         kernel_walker_param.resolution_x = hevc_state->frame_width_4x >> 5;
2991         kernel_walker_param.resolution_y = hevc_state->frame_height_4x >> 5;
2992
2993         media_function = GEN10_HEVC_MEDIA_STATE_4XME;
2994     } else {
2995         kernel_walker_param.resolution_x = hevc_state->frame_width_16x >> 5;
2996         kernel_walker_param.resolution_y = hevc_state->frame_height_16x >> 5;
2997
2998         media_function = GEN10_HEVC_MEDIA_STATE_16XME;
2999     }
3000
3001     kernel_walker_param.no_dependency = 1;
3002
3003     gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3004
3005     gen10_run_kernel_media_object_walker(ctx, encoder_context,
3006                                          gpe_context,
3007                                          media_function,
3008                                          &media_object_walker_param);
3009 }
3010
3011 #define     LUTMODE_INTRA_NONPRED_HEVC  0x00
3012 #define     LUTMODE_INTRA_32x32_HEVC    0x01
3013 #define     LUTMODE_INTRA_16x16_HEVC    0x02
3014 #define     LUTMODE_INTRA_8x8_HEVC      0x03
3015 #define     LUTMODE_INTER_32x16_HEVC    0x04
3016 #define     LUTMODE_INTER_16x32_HEVC    0x04
3017 #define     LUTMODE_INTER_AMP_HEVC      0x04
3018 #define     LUTMODE_INTER_16x16_HEVC    0x05
3019 #define     LUTMODE_INTER_16x8_HEVC     0x06
3020 #define     LUTMODE_INTER_8x16_HEVC     0x06
3021 #define     LUTMODE_INTER_8x8_HEVC      0x07
3022 #define     LUTMODE_INTER_32x32_HEVC    0x08
3023 #define     LUTMODE_INTER_BIDIR_HEVC    0x09
3024 #define     LUTMODE_REF_ID_HEVC         0x0A
3025 #define     LUTMODE_INTRA_CHROMA_HEVC   0x0B
3026
3027 #define     LAMBDA_RD_IDX               0x10
3028 #define     LAMBDA_MD_IDX               0x11
3029 #define     TUSAD_THR_IDX               0x12
3030
3031 #define     MAX_MODE_COST               0x20
3032
3033 static uint8_t
3034 map_44_lut_value(uint32_t value,
3035                  uint8_t max)
3036 {
3037     uint32_t max_cost = 0;
3038     int data = 0;
3039     uint8_t ret = 0;
3040
3041     if (value == 0)
3042         return 0;
3043
3044     max_cost = ((max & 15) << (max >> 4));
3045     if (value >= max_cost)
3046         return max;
3047
3048     data = (int)(log((double)value) / log(2.)) - 3;
3049     if (data < 0)
3050         data = 0;
3051
3052     ret = (uint8_t)((data << 4) +
3053                     (int)((value + (data == 0 ? 0 : (1 << (data - 1)))) >> data));
3054     ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
3055
3056     return ret;
3057 }
3058
3059 static void
3060 gen10_hevc_calc_costs(uint32_t *mode_cost, int slice_type, int qp, bool b_lcu64)
3061 {
3062     unsigned short lambda_md;
3063     unsigned int lambda_rd;
3064     unsigned int tu_sad_thres;
3065     float qp_value;
3066     double lambda;
3067     double intra_weigh_factor;
3068     double inter_weigh_factor;
3069     double qp_scale, cost_scale;
3070     int lcu_idx;
3071
3072     if (!mode_cost)
3073         return;
3074
3075     if (slice_type == HEVC_SLICE_I) {
3076         qp_scale = 5.0;
3077         cost_scale = 1.0;
3078     } else {
3079         qp_scale = 0.55;
3080         cost_scale = 2.0;
3081     }
3082
3083     if (b_lcu64)
3084         lcu_idx = 1;
3085     else
3086         lcu_idx = 0;
3087
3088     qp_value = qp - 12;
3089     if (qp_value < 0)
3090         qp_value = 0;
3091
3092     lambda     = sqrt(qp_scale * pow(2.0, qp_value / 3.0));
3093     lambda_rd  = (unsigned int)(qp_scale * pow(2.0, qp_value / 3.0) * 256 + 0.5);
3094     lambda_md  = (unsigned short)(lambda * 256 + 0.5);
3095     tu_sad_thres = (unsigned int)(sqrt(0.85 * pow(2.0, qp_value / 3.0)) * 0.4 * 256 + 0.5);
3096
3097     inter_weigh_factor = cost_scale * lambda;
3098     intra_weigh_factor = inter_weigh_factor * gen10_hevc_lambda_factor[slice_type][qp];
3099
3100     mode_cost[LAMBDA_RD_IDX]   = lambda_rd;
3101     mode_cost[LAMBDA_MD_IDX]   = lambda_md;
3102     mode_cost[TUSAD_THR_IDX]   = tu_sad_thres;
3103
3104     mode_cost[LUTMODE_INTRA_NONPRED_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_NONPRED_HEVC]), 0x6f);
3105     mode_cost[LUTMODE_INTRA_32x32_HEVC]   = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_32x32_HEVC]), 0x8f);
3106     mode_cost[LUTMODE_INTRA_16x16_HEVC]   = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_16x16_HEVC]), 0x8f);
3107     mode_cost[LUTMODE_INTRA_8x8_HEVC]     = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_8x8_HEVC]), 0x8f);
3108     mode_cost[LUTMODE_INTRA_CHROMA_HEVC]  = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_CHROMA_HEVC]), 0x6f);
3109
3110     mode_cost[LUTMODE_INTER_32x32_HEVC]   = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_32x32_HEVC]), 0x8f);
3111     mode_cost[LUTMODE_INTER_32x16_HEVC]   = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_32x16_HEVC]), 0x8f);
3112     mode_cost[LUTMODE_INTER_16x16_HEVC]   = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_16x16_HEVC]), 0x6f);
3113     mode_cost[LUTMODE_INTER_16x8_HEVC]    = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_16x8_HEVC]), 0x6f);
3114     mode_cost[LUTMODE_INTER_8x8_HEVC]     = map_44_lut_value((uint32_t)(0.45 * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_8x8_HEVC]), 0x6f);
3115
3116     mode_cost[LUTMODE_INTER_BIDIR_HEVC]   = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_BIDIR_HEVC]), 0x6f);
3117     if (slice_type != HEVC_SLICE_I)
3118         mode_cost[LUTMODE_REF_ID_HEVC]    = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_REF_ID_HEVC]), 0x6f);
3119     else
3120         mode_cost[LUTMODE_REF_ID_HEVC]    = 0;
3121 }
3122
3123 static void
3124 gen10_hevc_enc_generate_regions_in_slice_control(VADriverContextP ctx,
3125                                                  struct encode_state *encode_state,
3126                                                  struct intel_encoder_context *encoder_context)
3127 {
3128     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3129     struct gen10_hevc_enc_state *hevc_state;
3130     VAEncSliceParameterBufferHEVC *slice_param;
3131     gen10_hevc_concurrent_tg_data *pregion;
3132     int i, k, slice, num_regions, height, num_slices;
3133     int num_wf_in_region;
3134     uint32_t  frame_width_in_ctb, frame_height_in_ctb;
3135     bool is_arbitary_slices;
3136     int slice_starty[I965_MAX_NUM_SLICE + 1];
3137     int regions_start_table[64];
3138     uint32_t start_offset_to_region[16];
3139     int16_t data_tmp[32][32];
3140     int max_height;
3141     int log2_lcu_size;
3142     int copy_blk_size = 0;
3143
3144     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3145
3146     memset(slice_starty, 0, sizeof(slice_starty));
3147     memset(regions_start_table, 0, sizeof(regions_start_table));
3148     memset(data_tmp, 0, sizeof(data_tmp));
3149     memset(&hevc_state->hevc_wf_param, 0, sizeof(hevc_state->hevc_wf_param));
3150     memset(start_offset_to_region, 0, sizeof(start_offset_to_region));
3151
3152     frame_width_in_ctb = vme_context->frame_info.width_in_lcu;
3153     frame_height_in_ctb = vme_context->frame_info.height_in_lcu;
3154     if (hevc_state->is_64lcu) {
3155         log2_lcu_size = 6;
3156         copy_blk_size = 22;
3157     } else {
3158         log2_lcu_size = 5;
3159         copy_blk_size = 18;
3160     }
3161
3162     is_arbitary_slices = false;
3163     for (slice = 0; slice < encode_state->num_slice_params_ext; slice++) {
3164         slice_param = NULL;
3165         if (encode_state->slice_params_ext[slice] &&
3166             encode_state->slice_params_ext[slice]->buffer)
3167             slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice]->buffer;
3168
3169         if (!slice_param)
3170             continue;
3171
3172         if (slice_param->slice_segment_address % frame_width_in_ctb) {
3173             is_arbitary_slices = true;
3174         } else {
3175             slice_starty[slice] = slice_param->slice_segment_address / frame_width_in_ctb;
3176         }
3177     }
3178
3179     slice_starty[encode_state->num_slice_params_ext] = frame_height_in_ctb;
3180
3181     regions_start_table[0] = 0;
3182     regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + 0] = 0;
3183     num_regions = 1;
3184
3185     if (is_arbitary_slices) {
3186         height = frame_height_in_ctb;
3187         num_slices = 1;
3188         max_height = height;
3189         if (hevc_state->num_regions_in_slice > 1) {
3190             num_wf_in_region = (frame_width_in_ctb + 2 * (frame_height_in_ctb - 1) + hevc_state->num_regions_in_slice - 1) /
3191                                hevc_state->num_regions_in_slice;
3192
3193             num_regions = hevc_state->num_regions_in_slice;
3194
3195             for (i = 1; i < hevc_state->num_regions_in_slice; i++) {
3196                 int front = i * num_wf_in_region;
3197
3198                 if (front < frame_width_in_ctb) {
3199                     regions_start_table[i] = front;
3200                 } else if (((front - frame_width_in_ctb + 1) & 1) == 0) {
3201                     regions_start_table[i] = frame_width_in_ctb - 1;
3202                 } else {
3203                     regions_start_table[i] = frame_width_in_ctb - 2;
3204                 }
3205
3206                 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + i] = (front - regions_start_table[i]) >> 1;
3207             }
3208         }
3209     } else {
3210         int start_y = 0, slice_height;
3211         int slice_is_merged = 0;
3212
3213         max_height = 0;
3214         num_slices = encode_state->num_slice_params_ext;
3215
3216         for (slice = 0; slice < num_slices; slice++) {
3217             slice_height = slice_starty[slice + 1] - slice_starty[slice];
3218
3219             if (slice_height > max_height)
3220                 max_height = slice_height;
3221         }
3222
3223         while (!slice_is_merged) {
3224             int new_num_slices = 1;
3225
3226             start_y = 0;
3227
3228             for (slice = 1; slice < num_slices; slice++) {
3229                 if ((slice_starty[slice + 1] - start_y) <= max_height) {
3230                     slice_starty[slice] = -1;
3231                 } else {
3232                     start_y = slice_starty[slice];
3233                 }
3234             }
3235
3236             for (slice = 1; slice < num_slices; slice++) {
3237                 if (slice_starty[slice] > 0) {
3238                     slice_starty[new_num_slices] = slice_starty[slice];
3239                     new_num_slices++;
3240                 }
3241             }
3242
3243             num_slices = new_num_slices;
3244             slice_starty[num_slices] = frame_height_in_ctb;
3245
3246             if (num_slices * hevc_state->num_regions_in_slice <= 16) {
3247                 slice_is_merged = 1;
3248             } else {
3249                 int num = 1;
3250
3251                 max_height = frame_height_in_ctb;
3252
3253                 for (slice = 0; slice < num_slices - 1; slice++) {
3254                     if ((slice_starty[slice + 2] - slice_starty[slice]) <= max_height) {
3255                         max_height = slice_starty[slice + 2] - slice_starty[slice];
3256                         num = slice + 1;
3257                     }
3258                 }
3259
3260                 for (slice = num; slice < num_slices; slice++)
3261                     slice_starty[slice] = slice_starty[slice + 1];
3262
3263                 num_slices--;
3264             }
3265         }
3266
3267         num_wf_in_region = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) /
3268                            hevc_state->num_regions_in_slice;
3269         num_regions = num_slices * hevc_state->num_regions_in_slice;
3270
3271         for (slice = 0; slice < num_slices; slice++) {
3272             regions_start_table[slice * hevc_state->num_regions_in_slice] = 0;
3273             regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (slice * hevc_state->num_regions_in_slice)] = slice_starty[slice];
3274
3275             for (i = 1; i < hevc_state->num_regions_in_slice; i++) {
3276                 int front = i * num_wf_in_region;
3277
3278                 if (front < frame_width_in_ctb)
3279                     regions_start_table[slice * hevc_state->num_regions_in_slice + i] = front;
3280                 else if (((front - frame_width_in_ctb + 1) & 1) == 0)
3281                     regions_start_table[slice * hevc_state->num_regions_in_slice + i] = frame_width_in_ctb - 1;
3282                 else
3283                     regions_start_table[slice * hevc_state->num_regions_in_slice + i] = frame_width_in_ctb - 2;
3284
3285                 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (slice * hevc_state->num_regions_in_slice + i)] = slice_starty[slice] +
3286                                                                                                                          ((front - regions_start_table[i]) >> 1);
3287             }
3288         }
3289         height = max_height;
3290     }
3291
3292     for (k = 0; k < num_slices; k++) {
3293         int nearest_reg = 0, delta, tmp_y;
3294         int min_delta = hevc_state->frame_height;
3295         int cur_lcu_pel_y = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (k * hevc_state->num_regions_in_slice)] << log2_lcu_size;
3296         int ts_width   = frame_width_in_ctb;
3297         int ts_height  = height;
3298         int offset_y   = -((ts_width + 1) >> 1);
3299         int offset_delta = ((ts_width + ((ts_height - 1) << 1)) + (hevc_state->num_regions_in_slice - 1)) / (hevc_state->num_regions_in_slice);
3300
3301         for (i = 0; i < num_regions; i++) {
3302             if (regions_start_table[i] == 0) {
3303                 delta = cur_lcu_pel_y - (regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + i] << log2_lcu_size);
3304
3305                 if (delta >= 0) {
3306                     if (delta < min_delta) {
3307                         min_delta = delta;
3308                         nearest_reg = i;
3309                     }
3310                 }
3311             }
3312
3313             start_offset_to_region[k] = 2 * regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg];
3314         }
3315         for (i = 0; i < hevc_state->num_regions_in_slice; i++) {
3316             data_tmp[k * hevc_state->num_regions_in_slice + i][0] = slice_starty[k] * frame_width_in_ctb;
3317             data_tmp[k * hevc_state->num_regions_in_slice + i][1] = (k == (num_slices - 1)) ?
3318                                                                     frame_width_in_ctb * frame_height_in_ctb : slice_starty[k + 1] * frame_width_in_ctb;
3319             data_tmp[k * hevc_state->num_regions_in_slice + i][2] = k * hevc_state->num_regions_in_slice + i;
3320             if (!hevc_state->is_64lcu && hevc_state->num_regions_in_slice == 1) {
3321                 continue;
3322             }
3323
3324             data_tmp[k * hevc_state->num_regions_in_slice + i][3] = height;
3325             data_tmp[k * hevc_state->num_regions_in_slice + i][4] = regions_start_table[nearest_reg + i];
3326             data_tmp[k * hevc_state->num_regions_in_slice + i][5] = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (nearest_reg + i)];
3327             data_tmp[k * hevc_state->num_regions_in_slice + i][6] = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg];
3328             tmp_y = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (nearest_reg + hevc_state->num_regions_in_slice)];
3329             data_tmp[k * hevc_state->num_regions_in_slice + i][7] = (tmp_y != 0) ? tmp_y : frame_height_in_ctb;
3330             data_tmp[k * hevc_state->num_regions_in_slice + i][8] = offset_y + regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg] + ((i * offset_delta) >> 1);
3331             if (hevc_state->is_64lcu) {
3332                 data_tmp[k * hevc_state->num_regions_in_slice + i][9] = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
3333                 data_tmp[k * hevc_state->num_regions_in_slice + i][10] = num_regions;
3334             }
3335         }
3336     }
3337
3338
3339     pregion = (gen10_hevc_concurrent_tg_data *) i965_map_gpe_resource(&vme_context->res_concurrent_tg_data);
3340     if (!pregion)
3341         return;
3342
3343     memset(pregion, 0, vme_context->res_concurrent_tg_data.size);
3344
3345     for (i = 0; i < 16; i++) {
3346         memcpy(pregion, data_tmp[i], copy_blk_size);
3347         pregion++;
3348     }
3349
3350     hevc_state->hevc_wf_param.max_height_in_region = max_height;
3351     hevc_state->hevc_wf_param.num_regions = num_regions;
3352     hevc_state->hevc_wf_param.num_unit_in_wf = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
3353
3354     i965_unmap_gpe_resource(&vme_context->res_concurrent_tg_data);
3355 }
3356
3357 static void
3358 gen10_hevc_enc_generate_lculevel_data(VADriverContextP ctx,
3359                                       struct encode_state *encode_state,
3360                                       struct intel_encoder_context *encoder_context)
3361 {
3362     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3363     VAEncPictureParameterBufferHEVC *pic_param;
3364     VAEncSliceParameterBufferHEVC *slice_param;
3365     gen10_hevc_lcu_level_data *plcu_level_data;
3366     int ui_start_lcu, slice_idx, i;
3367
3368     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3369
3370     plcu_level_data = (gen10_hevc_lcu_level_data *)
3371                       i965_map_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
3372
3373     if (!plcu_level_data)
3374         return;
3375
3376     slice_idx = 0;
3377     for (ui_start_lcu = 0, slice_idx = 0; slice_idx < encode_state->num_slice_params_ext; slice_idx++) {
3378
3379         slice_param = NULL;
3380         if (encode_state->slice_params_ext[slice_idx] &&
3381             encode_state->slice_params_ext[slice_idx]->buffer)
3382             slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_idx]->buffer;
3383
3384         if (!slice_param)
3385             continue;
3386
3387         for (i = 0; i < slice_param->num_ctu_in_slice; i++, plcu_level_data++) {
3388             plcu_level_data->slice_start_lcu_idx = ui_start_lcu;
3389             plcu_level_data->slice_end_lcu_idx   = ui_start_lcu + slice_param->num_ctu_in_slice;
3390             plcu_level_data->slice_id            = slice_idx + 1;
3391             plcu_level_data->slice_qp            = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3392         }
3393
3394         ui_start_lcu += slice_param->num_ctu_in_slice;
3395     }
3396
3397     i965_unmap_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
3398 }
3399
3400 static void
3401 gen10_hevc_enc_mbenc_intra_curbe(VADriverContextP ctx,
3402                                  struct encode_state *encode_state,
3403                                  struct intel_encoder_context *encoder_context,
3404                                  struct i965_gpe_context *gpe_context)
3405 {
3406     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3407     struct gen10_hevc_enc_state *hevc_state;
3408     gen10_hevc_mbenc_intra_curbe_data *mbenc_curbe;
3409     VAEncSliceParameterBufferHEVC *slice_param;
3410     VAEncPictureParameterBufferHEVC *pic_param;
3411     VAEncSequenceParameterBufferHEVC *seq_param;
3412     int slice_qp;
3413     unsigned int mode_cost[MAX_MODE_COST];
3414     int tu_idx;
3415
3416     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3417
3418     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
3419     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3420     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3421     mbenc_curbe = i965_gpe_context_map_curbe(gpe_context);
3422
3423     if (!mbenc_curbe)
3424         return;
3425
3426     memset(mbenc_curbe, 0, sizeof(gen10_hevc_mbenc_intra_curbe_data));
3427
3428     if (hevc_state->brc.target_usage < 3)
3429         tu_idx = 0;
3430     else if (hevc_state->brc.target_usage < 7)
3431         tu_idx = 1;
3432     else
3433         tu_idx = 2;
3434
3435     mbenc_curbe->dw0.frame_width_in_pixel  = hevc_state->frame_width;
3436     mbenc_curbe->dw0.frame_height_in_pixel = hevc_state->frame_height;
3437
3438     mbenc_curbe->dw1.penalty_intra32x32_nondc_pred = 36;
3439     mbenc_curbe->dw1.penalty_intra16x16_nondc_pred = 12;
3440     mbenc_curbe->dw1.penalty_intra8x8_nondc_pred   = 4;
3441
3442     mbenc_curbe->dw2.intra_sad_measure_adj =    2;
3443     slice_qp = slice_param->slice_qp_delta + pic_param->pic_init_qp;
3444     gen10_hevc_calc_costs(mode_cost, HEVC_SLICE_I, slice_qp, hevc_state->is_64lcu);
3445
3446     mbenc_curbe->dw3.mode0_cost             = mode_cost[0];
3447     mbenc_curbe->dw3.mode1_cost             = mode_cost[1];
3448     mbenc_curbe->dw3.mode2_cost             = mode_cost[2];
3449     mbenc_curbe->dw3.mode3_cost             = mode_cost[3];
3450
3451     mbenc_curbe->dw4.mode4_cost             = mode_cost[4];
3452     mbenc_curbe->dw4.mode5_cost             = mode_cost[5];
3453     mbenc_curbe->dw4.mode6_cost             = mode_cost[6];
3454     mbenc_curbe->dw4.mode7_cost             = mode_cost[7];
3455
3456     mbenc_curbe->dw5.mode8_cost             = mode_cost[8];
3457     mbenc_curbe->dw5.mode9_cost             = mode_cost[9];
3458     mbenc_curbe->dw5.ref_id_cost             = mode_cost[10];
3459     mbenc_curbe->dw5.chroma_intra_mode_cost  = mode_cost[11];
3460
3461     mbenc_curbe->dw6.log2_min_cu_size        = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3462     mbenc_curbe->dw6.log2_max_cu_size        = seq_param->log2_diff_max_min_luma_coding_block_size +
3463                                                seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3464     mbenc_curbe->dw6.log2_max_tu_size        = seq_param->log2_diff_max_min_transform_block_size +
3465                                                seq_param->log2_min_transform_block_size_minus2 + 2;
3466     mbenc_curbe->dw6.log2_min_tu_size        = seq_param->log2_min_transform_block_size_minus2 + 2;
3467     if (seq_param->max_transform_hierarchy_depth_intra)
3468         mbenc_curbe->dw6.max_tr_depth_intra = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTRA_TU_PARAM][tu_idx];
3469     else
3470         mbenc_curbe->dw6.max_tr_depth_intra = 0;
3471
3472     mbenc_curbe->dw6.tu_split_flag          = 1;
3473
3474     mbenc_curbe->dw7.concurrent_group_num   = 1;
3475     mbenc_curbe->dw7.slice_qp               = slice_qp;
3476     mbenc_curbe->dw7.enc_tu_decision_mode   = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_MODE_TU_PARAM][tu_idx];
3477
3478     mbenc_curbe->dw8.lambda_rd              = mode_cost[LAMBDA_RD_IDX];
3479     mbenc_curbe->dw9.lambda_md              = mode_cost[LAMBDA_MD_IDX];
3480     mbenc_curbe->dw10.intra_tusad_thr       = mode_cost[TUSAD_THR_IDX];
3481
3482     mbenc_curbe->dw11.slice_type             = HEVC_SLICE_I;
3483
3484     if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP)
3485         mbenc_curbe->dw11.qp_type           = GEN10_HEVC_QP_TYPE_CONSTANT;
3486     else
3487         mbenc_curbe->dw11.qp_type           = hevc_state->brc.lcu_brc_enabled ? GEN10_HEVC_QP_TYPE_CU_LEVEL : GEN10_HEVC_QP_TYPE_FRAME;
3488
3489     mbenc_curbe->dw11.enc_qt_decision_mode  = gen10_hevc_tu_settings[GEN10_ENC_QT_DECISION_MODE_TU_PARAM][tu_idx];
3490
3491     mbenc_curbe->dw12.pcm_8x8_sad_threshold = 4700;
3492
3493     mbenc_curbe->dw16.bti_vme_intra_pred_surface           = GEN10_HEVC_MBENC_INTRA_VME_PRED_CURR_PIC_IDX0;
3494     mbenc_curbe->dw17.bti_curr_picture_y                   = GEN10_HEVC_MBENC_INTRA_CURR_Y;
3495     mbenc_curbe->dw18.bti_enc_curecord_surface             = GEN10_HEVC_MBENC_INTRA_INTERMEDIATE_CU_RECORD;
3496     mbenc_curbe->dw19.bti_pak_obj_cmd_surface              = GEN10_HEVC_MBENC_INTRA_PAK_OBJ0;
3497     mbenc_curbe->dw20.bti_cu_packet_for_pak_surface        = GEN10_HEVC_MBENC_INTRA_PAK_CU_RECORD;
3498     mbenc_curbe->dw21.bti_internal_scratch_surface         = GEN10_HEVC_MBENC_INTRA_SCRATCH_SURFACE;
3499     mbenc_curbe->dw22.bti_cu_based_qp_surface              = GEN10_HEVC_MBENC_INTRA_CU_QP_DATA;
3500     mbenc_curbe->dw23.bti_const_data_lut_surface           = GEN10_HEVC_MBENC_INTRA_CONST_DATA_LUT;
3501     mbenc_curbe->dw24.bti_lcu_level_data_input_surface     = GEN10_HEVC_MBENC_INTRA_LCU_LEVEL_DATA_INPUT;
3502     mbenc_curbe->dw25.bti_concurrent_tg_data_surface       = GEN10_HEVC_MBENC_INTRA_CONCURRENT_TG_DATA;
3503     mbenc_curbe->dw26.bti_brc_combined_enc_param_surface   = GEN10_HEVC_MBENC_INTRA_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3504     mbenc_curbe->dw27.bti_cu_split_surface                 = GEN10_HEVC_MBENC_INTRA_CU_SPLIT_SURFACE,
3505                       mbenc_curbe->dw28.bti_debug_surface                    = GEN10_HEVC_MBENC_INTRA_DEBUG_DUMP;
3506
3507     i965_gpe_context_unmap_curbe(gpe_context);
3508 }
3509
3510 static int
3511 gen10_hevc_compute_diff_poc(VADriverContextP ctx,
3512                             VAPictureHEVC *curr_pic,
3513                             VAPictureHEVC *ref_pic)
3514 {
3515     struct i965_driver_data *i965 = i965_driver_data(ctx);
3516     struct object_surface *obj_surface = NULL;
3517     int diff_poc = 0;
3518
3519     if (ref_pic->picture_id != VA_INVALID_SURFACE)
3520         obj_surface = SURFACE(ref_pic->picture_id);
3521
3522     if (!obj_surface || (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
3523         return diff_poc;
3524
3525     diff_poc = curr_pic->pic_order_cnt - ref_pic->pic_order_cnt;
3526
3527     if (diff_poc < -128)
3528         diff_poc = -128;
3529     else if (diff_poc > 127)
3530         diff_poc = 127;
3531
3532     return diff_poc;
3533 }
3534
3535 static void
3536 gen10_hevc_enc_mbenc_inter_curbe(VADriverContextP ctx,
3537                                  struct encode_state *encode_state,
3538                                  struct intel_encoder_context *encoder_context,
3539                                  struct i965_gpe_context *gpe_context)
3540 {
3541     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3542     struct gen10_hevc_enc_state *hevc_state;
3543     gen10_hevc_mbenc_inter_curbe_data *mbenc_curbe;
3544     VAEncSliceParameterBufferHEVC *slice_param;
3545     VAEncPictureParameterBufferHEVC *pic_param;
3546     VAEncSequenceParameterBufferHEVC *seq_param;
3547     int slice_qp;
3548     int tu_idx;
3549
3550     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3551
3552     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
3553     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3554     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3555     mbenc_curbe = i965_gpe_context_map_curbe(gpe_context);
3556
3557     if (!mbenc_curbe)
3558         return;
3559
3560     memset(mbenc_curbe, 0, sizeof(gen10_hevc_mbenc_inter_curbe_data));
3561
3562     if (hevc_state->brc.target_usage < 3)
3563         tu_idx = 0;
3564     else if (hevc_state->brc.target_usage < 7)
3565         tu_idx = 1;
3566     else
3567         tu_idx = 2;
3568
3569     slice_qp = slice_param->slice_qp_delta + pic_param->pic_init_qp;
3570     mbenc_curbe->dw0.frame_width_in_pixel  = hevc_state->frame_width;
3571     mbenc_curbe->dw0.frame_height_in_pixel = hevc_state->frame_height;
3572
3573     mbenc_curbe->dw1.log2_min_cu_size        = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3574     mbenc_curbe->dw1.log2_max_cu_size        = seq_param->log2_diff_max_min_luma_coding_block_size +
3575                                                seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3576     mbenc_curbe->dw1.log2_max_tu_size        = seq_param->log2_diff_max_min_transform_block_size +
3577                                                seq_param->log2_min_transform_block_size_minus2 + 2;
3578     mbenc_curbe->dw1.log2_min_tu_size        = seq_param->log2_min_transform_block_size_minus2 + 2;
3579
3580     if (seq_param->max_transform_hierarchy_depth_intra)
3581         mbenc_curbe->dw1.max_tr_depth_intra = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTRA_TU_PARAM][tu_idx];
3582     else
3583         mbenc_curbe->dw1.max_tr_depth_intra = 0;
3584
3585     if (seq_param->max_transform_hierarchy_depth_inter)
3586         mbenc_curbe->dw1.max_tr_depth_inter = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTER_TU_PARAM][tu_idx];
3587     else
3588         mbenc_curbe->dw1.max_tr_depth_inter = 0;
3589     mbenc_curbe->dw1.log2_para_merge_level = 2;
3590     mbenc_curbe->dw1.max_num_ime_search_center = 6;
3591
3592     mbenc_curbe->dw2.hme_flag                  = hevc_state->hme_enabled ? 3 : 0;
3593     mbenc_curbe->dw2.super_hme_enable          = hevc_state->b16xme_enabled ? 1 : 0;
3594     mbenc_curbe->dw2.hme_coarse_stage          = 1;
3595     mbenc_curbe->dw2.hme_subpel_mode           = 3;
3596     if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP)
3597         mbenc_curbe->dw2.qp_type           = GEN10_HEVC_QP_TYPE_CONSTANT;
3598     else
3599         mbenc_curbe->dw2.qp_type           = hevc_state->brc.lcu_brc_enabled ? GEN10_HEVC_QP_TYPE_CU_LEVEL : GEN10_HEVC_QP_TYPE_FRAME;
3600
3601     if (hevc_state->num_regions_in_slice > 1)
3602         mbenc_curbe->dw2.regions_in_slice_splits_enable = 1;
3603     else
3604         mbenc_curbe->dw2.regions_in_slice_splits_enable = 0;
3605
3606     mbenc_curbe->dw3.active_num_child_threads_cu64   = 0;
3607     mbenc_curbe->dw3.active_num_child_threads_cu32_0 = 0;
3608     mbenc_curbe->dw3.active_num_child_threads_cu32_1 = 0;
3609     mbenc_curbe->dw3.active_num_child_threads_cu32_2 = 0;
3610     mbenc_curbe->dw3.active_num_child_threads_cu32_3 = 0;
3611     mbenc_curbe->dw3.slice_qp               = slice_qp;
3612
3613     mbenc_curbe->dw4.skip_mode_enable       = 1;
3614     mbenc_curbe->dw4.adaptive_enable        = 1;
3615     mbenc_curbe->dw4.ime_ref_window_size    = 1;
3616     mbenc_curbe->dw4.hevc_min_cu_ctrl    = seq_param->log2_min_luma_coding_block_size_minus3;
3617
3618     mbenc_curbe->dw5.subpel_mode            = 3;
3619     mbenc_curbe->dw5.inter_sad_measure            = 2;
3620     mbenc_curbe->dw5.intra_sad_measure            = 2;
3621     mbenc_curbe->dw5.len_sp            = 63;
3622     mbenc_curbe->dw5.max_num_su        = 63;
3623     mbenc_curbe->dw5.refid_cost_mode        = 1;
3624
3625     mbenc_curbe->dw7.max_num_merge_cand     = slice_param->max_num_merge_cand;
3626     mbenc_curbe->dw7.slice_type     = slice_param->slice_type;
3627     mbenc_curbe->dw7.temporal_mvp_enable     = seq_param->seq_fields.bits.sps_temporal_mvp_enabled_flag;
3628     mbenc_curbe->dw7.mvp_collocated_from_l0  = slice_param->slice_fields.bits.collocated_from_l0_flag;
3629     mbenc_curbe->dw7.same_ref_list           = hevc_state->is_same_ref_list;
3630     if (slice_param->slice_type == HEVC_SLICE_B)
3631         mbenc_curbe->dw7.is_low_delay            = hevc_state->low_delay;
3632     else
3633         mbenc_curbe->dw7.is_low_delay            = 1;
3634
3635     mbenc_curbe->dw7.num_ref_idx_l0          = slice_param->num_ref_idx_l0_active_minus1 + 1;
3636     if (slice_param->slice_type == HEVC_SLICE_B)
3637         mbenc_curbe->dw7.num_ref_idx_l1          = slice_param->num_ref_idx_l1_active_minus1 + 1;
3638     else
3639         mbenc_curbe->dw7.num_ref_idx_l1          = 0;
3640
3641     mbenc_curbe->dw8.fwd_poc_num_l0_mtb_0   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3642                                                                           &slice_param->ref_pic_list0[0]);
3643     mbenc_curbe->dw8.fwd_poc_num_l0_mtb_1   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3644                                                                           &slice_param->ref_pic_list0[1]);
3645     mbenc_curbe->dw9.fwd_poc_num_l0_mtb_2   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3646                                                                           &slice_param->ref_pic_list0[2]);
3647     mbenc_curbe->dw9.fwd_poc_num_l0_mtb_3   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3648                                                                           &slice_param->ref_pic_list0[3]);
3649     if (slice_param->slice_type == HEVC_SLICE_B) {
3650         mbenc_curbe->dw8.bwd_poc_num_l1_mtb_0   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3651                                                                               &slice_param->ref_pic_list1[0]);
3652         mbenc_curbe->dw8.bwd_poc_num_l1_mtb_1   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3653                                                                               &slice_param->ref_pic_list1[1]);
3654         mbenc_curbe->dw9.bwd_poc_num_l1_mtb_2   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3655                                                                               &slice_param->ref_pic_list1[2]);
3656         mbenc_curbe->dw9.bwd_poc_num_l1_mtb_3   = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3657                                                                               &slice_param->ref_pic_list1[3]);
3658     }
3659
3660     mbenc_curbe->dw13.ref_frame_hor_size = hevc_state->frame_width;
3661     mbenc_curbe->dw13.ref_frame_ver_size = hevc_state->frame_height;
3662
3663     mbenc_curbe->dw15.concurrent_gop_num = hevc_state->hevc_wf_param.num_regions;
3664     mbenc_curbe->dw15.total_thread_num_per_lcu = gen10_hevc_tu_settings[GEN10_TOTAL_THREAD_NUM_PER_LCU_TU_PARAM][tu_idx];
3665     mbenc_curbe->dw15.regions_in_slice_split_count = hevc_state->num_regions_in_slice;
3666
3667     mbenc_curbe->dw1.max_num_ime_search_center               = gen10_hevc_tu_settings[GEN10_MAX_NUM_IME_SEARCH_CENTER_TU_PARAM][tu_idx];
3668
3669     if (hevc_state->is_64lcu)
3670         mbenc_curbe->dw2.enable_cu64_check       = gen10_hevc_tu_settings[GEN10_ENABLE_CU64_CHECK_TU_PARAM][tu_idx];
3671     else
3672         mbenc_curbe->dw2.enable_cu64_check       = 0;
3673
3674     mbenc_curbe->dw2.enc_trans_simplify          = gen10_hevc_tu_settings[GEN10_ENC_TRANSFORM_SIMPLIFY_TU_PARAM][tu_idx];
3675     mbenc_curbe->dw2.enc_tu_dec_mode             = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_MODE_TU_PARAM][tu_idx];
3676     mbenc_curbe->dw2.enc_tu_dec_for_all_qt       = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_FOR_ALL_QT_TU_PARAM][tu_idx];
3677     mbenc_curbe->dw2.coef_bit_est_mode           = gen10_hevc_tu_settings[GEN10_COEF_BIT_EST_MODE_TU_PARAM][tu_idx];
3678     mbenc_curbe->dw2.enc_skip_dec_mode           = gen10_hevc_tu_settings[GEN10_ENC_SKIP_DECISION_MODE_TU_PARAM][tu_idx];
3679     mbenc_curbe->dw2.enc_qt_dec_mode             = gen10_hevc_tu_settings[GEN10_ENC_QT_DECISION_MODE_TU_PARAM][tu_idx];
3680     mbenc_curbe->dw2.lcu32_enc_rd_dec_mode_for_all_qt   = gen10_hevc_tu_settings[GEN10_ENC_RD_DECISION_MODE_FOR_ALL_QT_TU_PARAM][tu_idx];
3681     mbenc_curbe->dw2.lcu64_cu64_skip_check_only  = (tu_idx == 1);
3682     mbenc_curbe->dw2.sic_dys_run_path_mode       = gen10_hevc_tu_settings[GEN10_SIC_DYNAMIC_RUN_PATH_MODE][tu_idx];
3683
3684     if (hevc_state->is_64lcu) {
3685         mbenc_curbe->dw16.bti_curr_picture_y          =
3686             GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y;
3687         mbenc_curbe->dw17.bti_enc_curecord_surface    =
3688             GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CU_RECORD;
3689         mbenc_curbe->dw18.bti_lcu64_enc_curecord2_surface  =
3690             GEN10_HEVC_MBENC_INTER_LCU64_SECOND_CU32_ENC_CU_RECORD;
3691         mbenc_curbe->dw19.bti_lcu64_pak_objcmd_surface =
3692             GEN10_HEVC_MBENC_INTER_LCU64_PAK_OBJ0;
3693         mbenc_curbe->dw20.bti_lcu64_pak_curecord_surface  =
3694             GEN10_HEVC_MBENC_INTER_LCU64_PAK_CU_RECORD;
3695         mbenc_curbe->dw21.bti_lcu64_vme_intra_inter_pred_surface =
3696             GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_IDX0;
3697         mbenc_curbe->dw22.bti_lcu64_cu16_qpdata_input_surface =
3698             GEN10_HEVC_MBENC_INTER_LCU64_CU16x16_QP_DATA;
3699         mbenc_curbe->dw23.bti_lcu64_cu32_enc_const_table_surface =
3700             GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CONST_TABLE;
3701         mbenc_curbe->dw24.bti_lcu64_colocated_mvdata_surface =
3702             GEN10_HEVC_MBENC_INTER_LCU64_COLOCATED_CU_MV_DATA;
3703         mbenc_curbe->dw25.bti_lcu64_hme_pred_surface         =
3704             GEN10_HEVC_MBENC_INTER_LCU64_HME_MOTION_PREDICTOR_DATA;
3705         mbenc_curbe->dw26.bti_lcu64_lculevel_data_input_surface      =
3706             GEN10_HEVC_MBENC_INTER_LCU64_LCU_LEVEL_DATA_INPUT;
3707         mbenc_curbe->dw27.bti_lcu64_cu32_enc_scratch_surface  =
3708             GEN10_HEVC_MBENC_INTER_LCU64_CU32_LCU_ENC_SCRATCH_SURFACE;
3709         mbenc_curbe->dw28.bti_lcu64_64x64_dist_surface        =
3710             GEN10_HEVC_MBENC_INTER_LCU64_64X64_DISTORTION_SURFACE;
3711         mbenc_curbe->dw29.bti_lcu64_concurrent_tg_data_surface =
3712             GEN10_HEVC_MBENC_INTER_LCU64_CONCURRENT_TG_DATA;
3713         mbenc_curbe->dw30.bti_lcu64_brc_combined_enc_param_surface  =
3714             GEN10_HEVC_MBENC_INTER_LCU64_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3715         mbenc_curbe->dw31.bti_lcu64_cu32_jbq1d_buf_surface  =
3716             GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
3717         mbenc_curbe->dw32.bti_lcu64_cu32_jbq2d_buf_surface  =
3718             GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_2D_SURFACE;
3719         mbenc_curbe->dw33.bti_lcu64_cu32_residual_scratch_surface  =
3720             GEN10_HEVC_MBENC_INTER_LCU64_CU32_RESIDUAL_DATA_SCRATCH_SURFACE;
3721         mbenc_curbe->dw34.bti_lcu64_cusplit_surface =
3722             GEN10_HEVC_MBENC_INTER_LCU64_CU_SPLIT_DATA_SURFACE;
3723         mbenc_curbe->dw35.bti_lcu64_curr_picture_y_2xds   =
3724             GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y_2xDS;
3725         mbenc_curbe->dw36.bti_lcu64_intermediate_curecord_surface =
3726             GEN10_HEVC_MBENC_INTER_LCU64_INTERMEDIATE_CU_RECORD;
3727         mbenc_curbe->dw37.bti_lcu64_const_data_lut_surface        =
3728             GEN10_HEVC_MBENC_INTER_LCU64_CONST64_DATA_LUT;
3729         mbenc_curbe->dw38.bti_lcu64_lcu_storage_surface           =
3730             GEN10_HEVC_MBENC_INTER_LCU64_LCU_STORAGE_SURFACE;
3731         mbenc_curbe->dw39.bti_lcu64_vme_inter_pred_2xds_surface   =
3732             GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
3733         mbenc_curbe->dw40.bti_lcu64_cu64_jbq1d_surface            =
3734             GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_1D_SURFACE;
3735         mbenc_curbe->dw41.bti_lcu64_cu64_jbq2d_surface            =
3736             GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_2D_SURFACE;
3737         mbenc_curbe->dw42.bti_lcu64_cu64_residual_scratch_surface =
3738             GEN10_HEVC_MBENC_INTER_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
3739         mbenc_curbe->dw43.bti_lcu64_debug_surface                 =
3740             GEN10_HEVC_MBENC_INTER_LCU64_DEBUG_SURFACE;
3741     } else {
3742         mbenc_curbe->dw16.bti_curr_picture_y                        =
3743             GEN10_HEVC_MBENC_INTER_LCU32_CURR_Y;
3744         mbenc_curbe->dw17.bti_enc_curecord_surface                  =
3745             GEN10_HEVC_MBENC_INTER_LCU32_ENC_CU_RECORD;
3746         mbenc_curbe->dw18.bti_lcu32_pak_objcmd_surface              =
3747             GEN10_HEVC_MBENC_INTER_LCU32_PAK_OBJ0;
3748         mbenc_curbe->dw19.bti_lcu32_pak_curecord_surface            =
3749             GEN10_HEVC_MBENC_INTER_LCU32_PAK_CU_RECORD;
3750         mbenc_curbe->dw20.bti_lcu32_vme_intra_inter_pred_surface    =
3751             GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_CURR_PIC_IDX0;
3752         mbenc_curbe->dw21.bti_lcu32_cu16_qpdata_input_surface   =
3753             GEN10_HEVC_MBENC_INTER_LCU32_CU16x16_QP_DATA;
3754         mbenc_curbe->dw22.bti_lcu32_enc_const_table_surface =
3755             GEN10_HEVC_MBENC_INTER_LCU32_ENC_CONST_TABLE;
3756         mbenc_curbe->dw23.bti_lcu32_colocated_mvdata_surface =
3757             GEN10_HEVC_MBENC_INTER_LCU32_COLOCATED_CU_MV_DATA;
3758         mbenc_curbe->dw24.bti_lcu32_hme_pred_data_surface    =
3759             GEN10_HEVC_MBENC_INTER_LCU32_HME_MOTION_PREDICTOR_DATA;
3760         mbenc_curbe->dw25.bti_lcu32_lculevel_data_input_surface   =
3761             GEN10_HEVC_MBENC_INTER_LCU32_LCU_LEVEL_DATA_INPUT;
3762         mbenc_curbe->dw26.bti_lcu32_enc_scratch_surface =
3763             GEN10_HEVC_MBENC_INTER_LCU32_LCU_ENC_SCRATCH_SURFACE;
3764         mbenc_curbe->dw27.bti_lcu32_concurrent_tg_data_surface =
3765             GEN10_HEVC_MBENC_INTER_LCU32_CONCURRENT_TG_DATA;
3766         mbenc_curbe->dw28.bti_lcu32_brc_combined_enc_param_surface  =
3767             GEN10_HEVC_MBENC_INTER_LCU32_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3768         mbenc_curbe->dw29.bti_lcu32_jbq_scratch_surface  =
3769             GEN10_HEVC_MBENC_INTER_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
3770         mbenc_curbe->dw30.bti_lcu32_cusplit_data_surface     =
3771             GEN10_HEVC_MBENC_INTER_LCU32_CU_SPLIT_DATA_SURFACE,
3772             mbenc_curbe->dw31.bti_lcu32_residual_scratch_surface =
3773                 GEN10_HEVC_MBENC_INTER_LCU32_RESIDUAL_DATA_SCRATCH_SURFACE,
3774                 mbenc_curbe->dw32.bti_lcu32_debug_surface =
3775                     GEN10_HEVC_MBENC_INTER_LCU32_DEBUG_SURFACE;
3776     }
3777
3778     i965_gpe_context_unmap_curbe(gpe_context);
3779 }
3780
3781 static void
3782 gen10_hevc_enc_mbenc_intra_surfaces(VADriverContextP ctx,
3783                                     struct encode_state *encode_state,
3784                                     struct intel_encoder_context *encoder_context,
3785                                     struct i965_gpe_context *gpe_context)
3786 {
3787     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3788     struct gen10_hevc_enc_state *hevc_state;
3789     struct object_surface *obj_surface;
3790     struct object_surface *vme_surface;
3791     struct gen10_hevc_surface_priv *surface_priv;
3792     int input_bti, i;
3793
3794     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3795
3796     obj_surface = encode_state->reconstructed_object;
3797
3798     surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
3799
3800     if (hevc_state->is_10bit)
3801         vme_surface = surface_priv->converted_surface;
3802     else
3803         vme_surface = encode_state->input_yuv_object;
3804
3805     input_bti = GEN10_HEVC_MBENC_INTRA_VME_PRED_CURR_PIC_IDX0;
3806     i965_add_adv_gpe_surface(ctx, gpe_context,
3807                              vme_surface,
3808                              input_bti);
3809     input_bti++;
3810
3811     for (i = 0; i < 8; i++) {
3812         i965_add_adv_gpe_surface(ctx, gpe_context,
3813                                  vme_surface,
3814                                  input_bti);
3815         input_bti++;
3816     }
3817
3818     input_bti = GEN10_HEVC_MBENC_INTRA_CURR_Y;
3819
3820     i965_add_2d_gpe_surface(ctx,
3821                             gpe_context,
3822                             vme_surface,
3823                             0,
3824                             1,
3825                             I965_SURFACEFORMAT_R8_UNORM,
3826                             input_bti);
3827     i965_add_2d_gpe_surface(ctx,
3828                             gpe_context,
3829                             vme_surface,
3830                             1,
3831                             1,
3832                             I965_SURFACEFORMAT_R16_UINT,
3833                             input_bti + 1);
3834
3835
3836     input_bti = GEN10_HEVC_MBENC_INTRA_INTERMEDIATE_CU_RECORD;
3837     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3838                                    &vme_context->res_temp_curecord_lcu32_surface,
3839                                    1, I965_SURFACEFORMAT_R8_UNORM,
3840                                    input_bti);
3841
3842     i965_add_buffer_gpe_surface(ctx, gpe_context,
3843                                 &vme_context->res_mb_code_surface,
3844                                 0,
3845                                 BYTES2UINT32(hevc_state->cu_records_offset),
3846                                 0,
3847                                 input_bti + 1);
3848
3849
3850     i965_add_buffer_gpe_surface(ctx, gpe_context,
3851                                 &vme_context->res_mb_code_surface,
3852                                 0,
3853                                 BYTES2UINT32(vme_context->res_mb_code_surface.size - hevc_state->cu_records_offset),
3854                                 hevc_state->cu_records_offset,
3855                                 input_bti + 2);
3856
3857     input_bti = GEN10_HEVC_MBENC_INTRA_SCRATCH_SURFACE;
3858     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3859                                    &vme_context->res_scratch_surface,
3860                                    1, I965_SURFACEFORMAT_R8_UNORM,
3861                                    input_bti);
3862
3863     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3864                                    &vme_context->res_16x16_qp_data_surface,
3865                                    1, I965_SURFACEFORMAT_R8_UNORM,
3866                                    input_bti + 1);
3867
3868     i965_add_buffer_gpe_surface(ctx, gpe_context,
3869                                 &vme_context->res_enc_const_table_intra,
3870                                 0,
3871                                 BYTES2UINT32(vme_context->res_enc_const_table_intra.size),
3872                                 0,
3873                                 input_bti + 2);
3874
3875     i965_add_buffer_gpe_surface(ctx, gpe_context,
3876                                 &vme_context->res_lculevel_input_data_buffer,
3877                                 0,
3878                                 BYTES2UINT32(vme_context->res_lculevel_input_data_buffer.size),
3879                                 0,
3880                                 input_bti + 3);
3881
3882     i965_add_buffer_gpe_surface(ctx, gpe_context,
3883                                 &vme_context->res_concurrent_tg_data,
3884                                 0,
3885                                 BYTES2UINT32(vme_context->res_concurrent_tg_data.size),
3886                                 0,
3887                                 input_bti + 4);
3888
3889     i965_add_buffer_gpe_surface(ctx, gpe_context,
3890                                 &vme_context->res_brc_input_enc_kernel_buffer,
3891                                 0,
3892                                 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
3893                                 0,
3894                                 input_bti + 5);
3895
3896     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3897                                    &vme_context->res_cu_split_surface,
3898                                    1, I965_SURFACEFORMAT_R8_UNORM,
3899                                    input_bti + 6);
3900
3901     i965_add_buffer_gpe_surface(ctx, gpe_context,
3902                                 &vme_context->res_kernel_trace_data,
3903                                 0,
3904                                 BYTES2UINT32(vme_context->res_kernel_trace_data.size),
3905                                 0,
3906                                 input_bti + 7);
3907 }
3908
3909 static void
3910 gen10_hevc_enc_mbenc_inter_lcu32_surfaces(VADriverContextP ctx,
3911                                           struct encode_state *encode_state,
3912                                           struct intel_encoder_context *encoder_context,
3913                                           struct i965_gpe_context *gpe_context)
3914 {
3915     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3916     struct gen10_hevc_enc_state *hevc_state;
3917     struct gen10_hevc_enc_frame_info *frame_info;
3918     struct gen10_hevc_enc_common_res *common_res;
3919     VAEncSliceParameterBufferHEVC *slice_param;
3920     VAEncPictureParameterBufferHEVC *pic_param;
3921     struct object_surface *obj_surface, *vme_surface;
3922     struct gen10_hevc_surface_priv *surface_priv;
3923     struct object_surface *l0_surface = NULL, *l1_surface = NULL, *tmp_surface;
3924     int input_bti, i;
3925
3926     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3927     frame_info = &vme_context->frame_info;
3928     common_res = &vme_context->common_res;
3929
3930     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3931     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3932
3933     obj_surface = encode_state->reconstructed_object;
3934
3935     surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
3936
3937     if (hevc_state->is_10bit)
3938         vme_surface = surface_priv->converted_surface;
3939     else
3940         vme_surface = encode_state->input_yuv_object;
3941
3942     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CURR_Y;
3943     i965_add_2d_gpe_surface(ctx,
3944                             gpe_context,
3945                             vme_surface,
3946                             0,
3947                             1,
3948                             I965_SURFACEFORMAT_R8_UNORM,
3949                             input_bti);
3950     i965_add_2d_gpe_surface(ctx,
3951                             gpe_context,
3952                             vme_surface,
3953                             1,
3954                             1,
3955                             I965_SURFACEFORMAT_R16_UINT,
3956                             input_bti + 1);
3957
3958     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_ENC_CU_RECORD;
3959     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3960                                    &vme_context->res_temp_curecord_lcu32_surface,
3961                                    1, I965_SURFACEFORMAT_R8_UNORM,
3962                                    input_bti);
3963
3964     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_PAK_OBJ0;
3965     i965_add_buffer_gpe_surface(ctx, gpe_context,
3966                                 &vme_context->res_mb_code_surface,
3967                                 0,
3968                                 BYTES2UINT32(hevc_state->cu_records_offset),
3969                                 0,
3970                                 input_bti);
3971     i965_add_buffer_gpe_surface(ctx, gpe_context,
3972                                 &vme_context->res_mb_code_surface,
3973                                 0,
3974                                 BYTES2UINT32(vme_context->res_mb_code_surface.size -
3975                                              hevc_state->cu_records_offset),
3976                                 hevc_state->cu_records_offset,
3977                                 input_bti + 1);
3978
3979     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_CURR_PIC_IDX0;
3980
3981     i965_add_adv_gpe_surface(ctx, gpe_context,
3982                              vme_surface,
3983                              input_bti);
3984
3985     if (frame_info->mapped_ref_idx_list0[0] >= 0)
3986         l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
3987     else
3988         l0_surface = NULL;
3989
3990     if (!l0_surface || !l0_surface->private_data)
3991         l0_surface = vme_surface;
3992     else {
3993         surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
3994         if (hevc_state->is_10bit)
3995             l0_surface = surface_priv->converted_surface;
3996     }
3997
3998     if (slice_param->slice_type == HEVC_SLICE_B) {
3999         if (frame_info->mapped_ref_idx_list1[0] > 0)
4000             l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4001         else
4002             l1_surface = NULL;
4003
4004         if (!l1_surface || !l1_surface->private_data)
4005             l1_surface = l0_surface;
4006         else {
4007             surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4008             if (hevc_state->is_10bit)
4009                 l1_surface = surface_priv->converted_surface;
4010         }
4011     }
4012
4013     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_FWD_PIC_IDX0;
4014     for (i = 0; i < 4; i++) {
4015         if (frame_info->mapped_ref_idx_list0[i] >= 0)
4016             tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4017         else
4018             tmp_surface = NULL;
4019
4020         if (tmp_surface && tmp_surface->private_data) {
4021             surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4022             if (hevc_state->is_10bit)
4023                 tmp_surface = surface_priv->converted_surface;
4024
4025             i965_add_adv_gpe_surface(ctx, gpe_context,
4026                                      tmp_surface,
4027                                      input_bti + 2 * i);
4028         } else
4029             i965_add_adv_gpe_surface(ctx, gpe_context,
4030                                      l0_surface,
4031                                      input_bti + 2 * i);
4032
4033         if (slice_param->slice_type == HEVC_SLICE_B) {
4034             if (frame_info->mapped_ref_idx_list1[i] >= 0)
4035                 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4036             else
4037                 tmp_surface = NULL;
4038
4039             if (tmp_surface && tmp_surface->private_data) {
4040                 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4041                 if (hevc_state->is_10bit)
4042                     tmp_surface = surface_priv->converted_surface;
4043
4044                 i965_add_adv_gpe_surface(ctx, gpe_context,
4045                                          tmp_surface,
4046                                          input_bti + 2 * i + 1);
4047             } else
4048                 i965_add_adv_gpe_surface(ctx, gpe_context,
4049                                          l1_surface,
4050                                          input_bti + 2 * i + 1);
4051         }
4052     }
4053
4054     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CU16x16_QP_DATA;
4055     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4056                                    &vme_context->res_16x16_qp_data_surface,
4057                                    1, I965_SURFACEFORMAT_R8_UNORM,
4058                                    input_bti);
4059
4060     i965_add_buffer_gpe_surface(ctx, gpe_context,
4061                                 &vme_context->res_enc_const_table_inter,
4062                                 0,
4063                                 BYTES2UINT32(vme_context->res_enc_const_table_inter.size),
4064                                 0,
4065                                 input_bti + 1);
4066
4067     if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
4068         (pic_param->collocated_ref_pic_index != 0xFF)) {
4069         obj_surface = common_res->reference_pics[pic_param->collocated_ref_pic_index].obj_surface;
4070         if (obj_surface && obj_surface->private_data) {
4071             surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4072
4073             i965_add_buffer_gpe_surface(ctx, gpe_context,
4074                                         &surface_priv->motion_vector_temporal,
4075                                         0,
4076                                         BYTES2UINT32(surface_priv->motion_vector_temporal.size),
4077                                         0,
4078                                         input_bti + 2);
4079         }
4080     }
4081
4082     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_HME_MOTION_PREDICTOR_DATA;
4083     if (hevc_state->hme_enabled) {
4084         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4085                                        &vme_context->res_s4x_memv_data_surface,
4086                                        1, I965_SURFACEFORMAT_R8_UNORM,
4087                                        input_bti);
4088     }
4089
4090     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_LCU_LEVEL_DATA_INPUT;
4091     i965_add_buffer_gpe_surface(ctx, gpe_context,
4092                                 &vme_context->res_lculevel_input_data_buffer,
4093                                 0,
4094                                 BYTES2UINT32(vme_context->res_lculevel_input_data_buffer.size),
4095                                 0,
4096                                 input_bti);
4097
4098     i965_add_buffer_gpe_surface(ctx, gpe_context,
4099                                 &vme_context->res_enc_scratch_buffer,
4100                                 0,
4101                                 BYTES2UINT32(vme_context->res_enc_scratch_buffer.size),
4102                                 0,
4103                                 input_bti + 1);
4104
4105
4106     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CONCURRENT_TG_DATA;
4107     i965_add_buffer_gpe_surface(ctx, gpe_context,
4108                                 &vme_context->res_concurrent_tg_data,
4109                                 0,
4110                                 BYTES2UINT32(vme_context->res_concurrent_tg_data.size),
4111                                 0,
4112                                 input_bti);
4113
4114     i965_add_buffer_gpe_surface(ctx, gpe_context,
4115                                 &vme_context->res_brc_input_enc_kernel_buffer,
4116                                 0,
4117                                 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
4118                                 0,
4119                                 input_bti + 1);
4120
4121     i965_add_buffer_gpe_surface(ctx, gpe_context,
4122                                 &vme_context->res_jbq_header_buffer,
4123                                 1,
4124                                 vme_context->res_jbq_header_buffer.size,
4125                                 0,
4126                                 input_bti + 2);
4127
4128     input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CU_SPLIT_DATA_SURFACE;
4129     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4130                                    &vme_context->res_cu_split_surface,
4131                                    1, I965_SURFACEFORMAT_R8_UNORM,
4132                                    input_bti);
4133
4134     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4135                                    &vme_context->res_residual_scratch_lcu32_surface,
4136                                    1, I965_SURFACEFORMAT_R8_UNORM,
4137                                    input_bti + 1);
4138
4139     i965_add_buffer_gpe_surface(ctx, gpe_context,
4140                                 &vme_context->res_kernel_trace_data,
4141                                 0,
4142                                 BYTES2UINT32(vme_context->res_kernel_trace_data.size),
4143                                 0,
4144                                 input_bti + 2);
4145 }
4146
4147 static void
4148 gen10_hevc_enc_mbenc_inter_lcu64_surfaces(VADriverContextP ctx,
4149                                           struct encode_state *encode_state,
4150                                           struct intel_encoder_context *encoder_context,
4151                                           struct i965_gpe_context *gpe_context)
4152 {
4153     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
4154     struct gen10_hevc_enc_state *hevc_state;
4155     struct gen10_hevc_enc_common_res *common_res;
4156     struct gen10_hevc_enc_frame_info *frame_info;
4157     struct object_surface *obj_surface, *vme_surface;
4158     struct gen10_hevc_surface_priv *surface_priv;
4159     struct object_surface *l0_surface, *l1_surface, *tmp_surface;
4160     VAEncSliceParameterBufferHEVC *slice_param;
4161     VAEncPictureParameterBufferHEVC *pic_param;
4162     int input_bti, i;
4163
4164     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
4165     frame_info = &vme_context->frame_info;
4166     common_res = &vme_context->common_res;
4167
4168     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
4169     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
4170
4171     obj_surface = encode_state->reconstructed_object;
4172
4173     surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4174
4175     if (hevc_state->is_10bit)
4176         vme_surface = surface_priv->converted_surface;
4177     else
4178         vme_surface = encode_state->input_yuv_object;
4179
4180     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y;
4181     i965_add_2d_gpe_surface(ctx,
4182                             gpe_context,
4183                             vme_surface,
4184                             0,
4185                             1,
4186                             I965_SURFACEFORMAT_R8_UNORM,
4187                             input_bti);
4188     i965_add_2d_gpe_surface(ctx,
4189                             gpe_context,
4190                             vme_surface,
4191                             1,
4192                             1,
4193                             I965_SURFACEFORMAT_R16_UINT,
4194                             input_bti + 1);
4195
4196     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CU_RECORD;
4197     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4198                                    &vme_context->res_temp_curecord_lcu32_surface,
4199                                    1, I965_SURFACEFORMAT_R8_UNORM,
4200                                    input_bti);
4201
4202     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4203                                    &vme_context->res_temp2_curecord_lcu32_surface,
4204                                    1, I965_SURFACEFORMAT_R8_UNORM,
4205                                    input_bti + 1);
4206
4207     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_PAK_OBJ0;
4208     i965_add_buffer_gpe_surface(ctx, gpe_context,
4209                                 &vme_context->res_mb_code_surface,
4210                                 1,
4211                                 hevc_state->cu_records_offset,
4212                                 0,
4213                                 input_bti);
4214     i965_add_buffer_gpe_surface(ctx, gpe_context,
4215                                 &vme_context->res_mb_code_surface,
4216                                 0,
4217                                 vme_context->res_mb_code_surface.size,
4218                                 hevc_state->cu_records_offset,
4219                                 input_bti + 1);
4220
4221     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_IDX0;
4222
4223     i965_add_adv_gpe_surface(ctx, gpe_context,
4224                              vme_surface,
4225                              input_bti);
4226
4227     if (frame_info->mapped_ref_idx_list0[0] >= 0)
4228         l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
4229     else
4230         l0_surface = NULL;
4231
4232     if (!l0_surface || !l0_surface->private_data)
4233         l0_surface = vme_surface;
4234     else {
4235         surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
4236         if (hevc_state->is_10bit)
4237             l0_surface = surface_priv->converted_surface;
4238     }
4239
4240     l1_surface = l0_surface;
4241     if (slice_param->slice_type == HEVC_SLICE_B) {
4242         if (frame_info->mapped_ref_idx_list1[0] > 0)
4243             l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4244         else
4245             l1_surface = NULL;
4246
4247         if (!l1_surface || !l1_surface->private_data)
4248             l1_surface = l0_surface;
4249         else {
4250             surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4251             if (hevc_state->is_10bit)
4252                 l1_surface = surface_priv->converted_surface;
4253         }
4254     }
4255
4256     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_FWD_PIC_IDX0;
4257     for (i = 0; i < 4; i++) {
4258         if (frame_info->mapped_ref_idx_list0[i] >= 0)
4259             tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4260         else
4261             tmp_surface = NULL;
4262
4263         if (tmp_surface && tmp_surface->private_data) {
4264             surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4265             if (hevc_state->is_10bit)
4266                 tmp_surface = surface_priv->converted_surface;
4267
4268             i965_add_adv_gpe_surface(ctx, gpe_context,
4269                                      tmp_surface,
4270                                      input_bti + 2 * i);
4271         } else
4272             i965_add_adv_gpe_surface(ctx, gpe_context,
4273                                      l0_surface,
4274                                      input_bti + 2 * i);
4275
4276         if (slice_param->slice_type == HEVC_SLICE_B) {
4277             if (frame_info->mapped_ref_idx_list1[i] >= 0)
4278                 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
4279             else
4280                 tmp_surface = NULL;
4281
4282             if (tmp_surface && tmp_surface->private_data) {
4283                 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4284                 if (hevc_state->is_10bit)
4285                     tmp_surface = surface_priv->converted_surface;
4286
4287                 i965_add_adv_gpe_surface(ctx, gpe_context,
4288                                          tmp_surface,
4289                                          input_bti + 2 * i + 1);
4290             } else
4291                 i965_add_adv_gpe_surface(ctx, gpe_context,
4292                                          l1_surface,
4293                                          input_bti + 2 * i + 1);
4294         }
4295     }
4296
4297     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU16x16_QP_DATA;
4298     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4299                                    &vme_context->res_16x16_qp_data_surface,
4300                                    1, I965_SURFACEFORMAT_R8_UNORM,
4301                                    input_bti);
4302
4303     i965_add_buffer_gpe_surface(ctx, gpe_context,
4304                                 &vme_context->res_enc_const_table_inter,
4305                                 0,
4306                                 vme_context->res_enc_const_table_inter.size,
4307                                 0,
4308                                 input_bti + 1);
4309
4310     if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
4311         (pic_param->collocated_ref_pic_index != 0xFF)) {
4312         obj_surface = common_res->reference_pics[pic_param->collocated_ref_pic_index].obj_surface;
4313         if (obj_surface && obj_surface->private_data) {
4314             surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4315
4316             i965_add_buffer_gpe_surface(ctx, gpe_context,
4317                                         &surface_priv->motion_vector_temporal,
4318                                         0,
4319                                         surface_priv->motion_vector_temporal.size,
4320                                         0,
4321                                         input_bti + 2);
4322         }
4323     }
4324
4325     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_HME_MOTION_PREDICTOR_DATA;
4326     if (hevc_state->hme_enabled) {
4327         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4328                                        &vme_context->res_s4x_memv_data_surface,
4329                                        1, I965_SURFACEFORMAT_R8_UNORM,
4330                                        input_bti);
4331     }
4332
4333     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_LCU_LEVEL_DATA_INPUT;
4334     i965_add_buffer_gpe_surface(ctx, gpe_context,
4335                                 &vme_context->res_lculevel_input_data_buffer,
4336                                 0,
4337                                 vme_context->res_lculevel_input_data_buffer.size,
4338                                 0,
4339                                 input_bti);
4340
4341     i965_add_buffer_gpe_surface(ctx, gpe_context,
4342                                 &vme_context->res_enc_scratch_buffer,
4343                                 0,
4344                                 vme_context->res_enc_scratch_buffer.size,
4345                                 0,
4346                                 input_bti + 1);
4347
4348     i965_add_buffer_gpe_surface(ctx, gpe_context,
4349                                 &vme_context->res_64x64_dist_buffer,
4350                                 1,
4351                                 vme_context->res_64x64_dist_buffer.size,
4352                                 0,
4353                                 input_bti + 2);
4354
4355
4356
4357     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CONCURRENT_TG_DATA;
4358     i965_add_buffer_gpe_surface(ctx, gpe_context,
4359                                 &vme_context->res_concurrent_tg_data,
4360                                 0,
4361                                 vme_context->res_concurrent_tg_data.size,
4362                                 0,
4363                                 input_bti);
4364
4365     i965_add_buffer_gpe_surface(ctx, gpe_context,
4366                                 &vme_context->res_brc_input_enc_kernel_buffer,
4367                                 0,
4368                                 vme_context->res_brc_input_enc_kernel_buffer.size,
4369                                 0,
4370                                 input_bti + 1);
4371
4372
4373     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
4374     i965_add_buffer_gpe_surface(ctx, gpe_context,
4375                                 &vme_context->res_jbq_header_buffer,
4376                                 1,
4377                                 vme_context->res_jbq_header_buffer.size,
4378                                 0,
4379                                 input_bti);
4380
4381     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4382                                    &vme_context->res_jbq_data_lcu32_surface,
4383                                    1, I965_SURFACEFORMAT_R8_UNORM,
4384                                    input_bti + 1);
4385
4386     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4387                                    &vme_context->res_residual_scratch_lcu32_surface,
4388                                    1, I965_SURFACEFORMAT_R8_UNORM,
4389                                    input_bti + 2);
4390
4391
4392     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU_SPLIT_DATA_SURFACE;
4393     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4394                                    &vme_context->res_cu_split_surface,
4395                                    1, I965_SURFACEFORMAT_R8_UNORM,
4396                                    input_bti);
4397
4398     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y_2xDS;
4399     obj_surface = encode_state->reconstructed_object;
4400     surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4401     vme_surface = surface_priv->scaled_2x_surface;
4402
4403     i965_add_2d_gpe_surface(ctx,
4404                             gpe_context,
4405                             vme_surface,
4406                             0,
4407                             1,
4408                             I965_SURFACEFORMAT_R8_UNORM,
4409                             input_bti);
4410
4411     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_INTERMEDIATE_CU_RECORD;
4412     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4413                                    &vme_context->res_temp_curecord_surface_lcu64,
4414                                    1, I965_SURFACEFORMAT_R8_UNORM,
4415                                    input_bti);
4416
4417     i965_add_buffer_gpe_surface(ctx, gpe_context,
4418                                 &vme_context->res_enc_const_table_inter_lcu64,
4419                                 1,
4420                                 vme_context->res_enc_const_table_inter_lcu64.size,
4421                                 0,
4422                                 input_bti + 1);
4423
4424     i965_add_buffer_gpe_surface(ctx, gpe_context,
4425                                 &vme_context->res_enc_scratch_lcu64_buffer,
4426                                 1,
4427                                 vme_context->res_enc_scratch_lcu64_buffer.size,
4428                                 0,
4429                                 input_bti + 2);
4430
4431     if (frame_info->mapped_ref_idx_list0[0] >= 0)
4432         l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
4433     else
4434         l0_surface = NULL;
4435
4436     if (!l0_surface || !l0_surface->private_data) {
4437         l0_surface = vme_surface;
4438     } else {
4439         surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
4440         l0_surface = surface_priv->scaled_2x_surface;
4441     }
4442
4443     l1_surface = l0_surface;
4444     if (slice_param->slice_type == HEVC_SLICE_B) {
4445         if (frame_info->mapped_ref_idx_list1[0] > 0)
4446             l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4447         else
4448             l1_surface = NULL;
4449
4450         if (!l1_surface || !l1_surface->private_data)
4451             l1_surface = l0_surface;
4452         else {
4453             surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4454             l1_surface = surface_priv->scaled_2x_surface;
4455         }
4456     }
4457
4458     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
4459     i965_add_adv_gpe_surface(ctx, gpe_context,
4460                              vme_surface,
4461                              input_bti);
4462
4463     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
4464     for (i = 0; i < 4; i++) {
4465         if (frame_info->mapped_ref_idx_list0[i] >= 0)
4466             tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4467         else
4468             tmp_surface = NULL;
4469
4470         if (tmp_surface && tmp_surface->private_data) {
4471             surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4472             tmp_surface = surface_priv->scaled_2x_surface;
4473
4474             i965_add_adv_gpe_surface(ctx, gpe_context,
4475                                      tmp_surface,
4476                                      input_bti + 2 * i);
4477         } else {
4478             i965_add_adv_gpe_surface(ctx, gpe_context,
4479                                      l0_surface,
4480                                      input_bti + 2 * i);
4481
4482         }
4483
4484         if (slice_param->slice_type == HEVC_SLICE_B) {
4485             if (frame_info->mapped_ref_idx_list1[i] >= 0)
4486                 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
4487             else
4488                 tmp_surface = NULL;
4489
4490             if (tmp_surface && tmp_surface->private_data) {
4491                 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4492                 tmp_surface = surface_priv->scaled_2x_surface;
4493
4494                 i965_add_adv_gpe_surface(ctx, gpe_context,
4495                                          tmp_surface,
4496                                          input_bti + 2 * i + 1);
4497             } else
4498                 i965_add_adv_gpe_surface(ctx, gpe_context,
4499                                          l1_surface,
4500                                          input_bti + 2 * i + 1);
4501         }
4502     }
4503
4504     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_1D_SURFACE;
4505
4506     i965_add_buffer_gpe_surface(ctx, gpe_context,
4507                                 &vme_context->res_jbq_header_lcu64_buffer,
4508                                 1,
4509                                 vme_context->res_jbq_header_lcu64_buffer.size,
4510                                 0,
4511                                 input_bti);
4512
4513     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4514                                    &vme_context->res_jbq_data_lcu64_surface,
4515                                    1, I965_SURFACEFORMAT_R8_UNORM,
4516                                    input_bti + 1);
4517
4518     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
4519     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4520                                    &vme_context->res_residual_scratch_lcu64_surface,
4521                                    1, I965_SURFACEFORMAT_R8_UNORM,
4522                                    input_bti);
4523
4524
4525     input_bti = GEN10_HEVC_MBENC_INTER_LCU64_DEBUG_SURFACE;
4526     i965_add_buffer_gpe_surface(ctx, gpe_context,
4527                                 &vme_context->res_kernel_trace_data,
4528                                 0,
4529                                 vme_context->res_kernel_trace_data.size,
4530                                 0,
4531                                 input_bti);
4532 }
4533
4534 static void
4535 gen10_hevc_mbenc_init_walker_param(struct gen10_hevc_enc_state *hevc_state,
4536                                    struct gen10_hevc_enc_kernel_walker_parameter *kernel_walker_param,
4537                                    struct gpe_media_object_walker_parameter *media_object_walker_param,
4538                                    struct gen10_hevc_gpe_scoreboard *hw_scoreboard)
4539 {
4540     int mw_26zx_h_factor;
4541
4542     if (kernel_walker_param->use_custom_walker == 0) {
4543         hw_scoreboard->scoreboard0.mask = 0x7F;
4544         hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4545         hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4546
4547         hw_scoreboard->dw1.scoreboard1.delta_x0 = 0xF;
4548         hw_scoreboard->dw1.scoreboard1.delta_y0 = 0x0;
4549
4550         hw_scoreboard->dw1.scoreboard1.delta_x1 = 0x0;
4551         hw_scoreboard->dw1.scoreboard1.delta_y1 = 0xF;
4552
4553         hw_scoreboard->dw1.scoreboard1.delta_x2 = 1;
4554         hw_scoreboard->dw1.scoreboard1.delta_y2 = 0xF;
4555
4556         hw_scoreboard->dw1.scoreboard1.delta_x3 = 0xF;
4557         hw_scoreboard->dw1.scoreboard1.delta_y3 = 0xF;
4558
4559         hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4560         hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4561         hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4562         hw_scoreboard->dw2.scoreboard2.delta_y5 = 0;
4563         hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4564         hw_scoreboard->dw2.scoreboard2.delta_y6 = 0;
4565         hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4566         hw_scoreboard->dw2.scoreboard2.delta_y7 = 0;
4567
4568         gen10_init_media_object_walker_parameter(kernel_walker_param, media_object_walker_param);
4569         return;
4570     }
4571
4572     media_object_walker_param->color_count_minus1 = hevc_state->hevc_wf_param.num_regions - 1;
4573
4574     media_object_walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
4575
4576     media_object_walker_param->local_loop_exec_count = 0xFFF;
4577     media_object_walker_param->global_loop_exec_count = 0xFFF;
4578
4579     switch (kernel_walker_param->walker_degree) {
4580     case GEN10_WALKER_26_DEGREE:
4581         if (hevc_state->num_regions_in_slice > 1) {
4582             int thread_space_width  = kernel_walker_param->resolution_x;
4583             int thread_space_height = hevc_state->hevc_wf_param.max_height_in_region;
4584
4585             int ts_width  = thread_space_width;
4586             int ts_height = thread_space_height;
4587             int tmp_height = (ts_height + 1) & 0xfffe;
4588             ts_height = tmp_height;
4589             tmp_height     = ((ts_width + 1) >> 1) + ((ts_width + ((tmp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4590
4591             media_object_walker_param->block_resolution.x           = ts_width;
4592             media_object_walker_param->block_resolution.y           = tmp_height;
4593
4594             media_object_walker_param->global_start.x               = 0;
4595             media_object_walker_param->global_start.y               = 0;
4596
4597             media_object_walker_param->global_resolution.x          = ts_width;
4598             media_object_walker_param->global_resolution.y          = tmp_height;
4599
4600             media_object_walker_param->local_start.x                = (ts_width + 1) & 0xfffe;;
4601             media_object_walker_param->local_start.y                = 0;
4602
4603             media_object_walker_param->local_end.x                  = 0;
4604             media_object_walker_param->local_end.y                  = 0;
4605
4606             media_object_walker_param->global_outer_loop_stride.x    = ts_width;
4607             media_object_walker_param->global_outer_loop_stride.y    = 0;
4608
4609             media_object_walker_param->global_inner_loop_unit.x       = 0;
4610             media_object_walker_param->global_inner_loop_unit.y       = tmp_height;
4611
4612             media_object_walker_param->scoreboard_mask              = 0x7F;
4613             media_object_walker_param->local_outer_loop_stride.x        = 1;
4614             media_object_walker_param->local_outer_loop_stride.y        = 0;
4615             media_object_walker_param->local_inner_loop_unit.x        = -2;
4616             media_object_walker_param->local_inner_loop_unit.y        = 1;
4617
4618             media_object_walker_param->global_loop_exec_count       = 0;
4619             media_object_walker_param->local_loop_exec_count        = (thread_space_width + (ts_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4620         } else {
4621             media_object_walker_param->block_resolution.x        = kernel_walker_param->resolution_x;
4622             media_object_walker_param->block_resolution.y        = kernel_walker_param->resolution_y;
4623
4624             media_object_walker_param->global_resolution.x       = media_object_walker_param->block_resolution.x;
4625             media_object_walker_param->global_resolution.y       = media_object_walker_param->block_resolution.y;
4626
4627             media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->block_resolution.x;
4628             media_object_walker_param->global_outer_loop_stride.y = 0;
4629
4630             media_object_walker_param->global_inner_loop_unit.x    = 0;
4631             media_object_walker_param->global_inner_loop_unit.y    = media_object_walker_param->block_resolution.y;
4632
4633             media_object_walker_param->scoreboard_mask         = 0x7F;
4634             media_object_walker_param->local_outer_loop_stride.x   = 1;
4635             media_object_walker_param->local_outer_loop_stride.y   = 0;
4636             media_object_walker_param->local_inner_loop_unit.x   = -2;
4637             media_object_walker_param->local_inner_loop_unit.y   = 1;
4638         }
4639
4640         {
4641             hw_scoreboard->scoreboard0.mask       = 0x7F;
4642             hw_scoreboard->scoreboard0.enable     = hevc_state->use_hw_scoreboard;
4643
4644             hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4645             hw_scoreboard->dw1.scoreboard1.delta_y0 = 0;
4646
4647             hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4648             hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4649
4650             hw_scoreboard->dw1.scoreboard1.delta_x2 = 0;
4651             hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4652
4653             hw_scoreboard->dw1.scoreboard1.delta_x3 = 1;
4654             hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4655
4656             hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4657             hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4658
4659             hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4660             hw_scoreboard->dw2.scoreboard2.delta_y5 = 0;
4661
4662             hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4663             hw_scoreboard->dw2.scoreboard2.delta_y6 = 0;
4664
4665             hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4666             hw_scoreboard->dw2.scoreboard2.delta_y7 = 0;
4667         }
4668         break;
4669     case GEN10_WALKER_26Z_DEGREE: {
4670         media_object_walker_param->scoreboard_mask           = 0x7f;
4671
4672         media_object_walker_param->global_resolution.x       = kernel_walker_param->resolution_x;
4673         media_object_walker_param->global_resolution.y       = kernel_walker_param->resolution_y;
4674
4675         media_object_walker_param->global_outer_loop_stride.x = 2;
4676         media_object_walker_param->global_outer_loop_stride.y = 0;
4677
4678         media_object_walker_param->global_inner_loop_unit.x    = 0xFFF - 4 + 1;
4679         media_object_walker_param->global_inner_loop_unit.y    = 2;
4680
4681         media_object_walker_param->local_outer_loop_stride.x     = 0;
4682         media_object_walker_param->local_outer_loop_stride.y     = 1;
4683         media_object_walker_param->local_inner_loop_unit.x     = 1;
4684         media_object_walker_param->local_inner_loop_unit.y     = 0;
4685
4686         media_object_walker_param->block_resolution.x        = 2;
4687         media_object_walker_param->block_resolution.y        = 2;
4688     }
4689
4690     {
4691         hw_scoreboard->scoreboard0.type           = hevc_state->use_hw_non_stalling_scoreboard;
4692         hw_scoreboard->scoreboard0.mask           = 0x7F;
4693         hw_scoreboard->scoreboard0.enable         = hevc_state->use_hw_scoreboard;
4694
4695         hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4696         hw_scoreboard->dw1.scoreboard1.delta_y0 = 1;
4697
4698         hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4699         hw_scoreboard->dw1.scoreboard1.delta_y1 = 0;
4700
4701         hw_scoreboard->dw1.scoreboard1.delta_x2 = -1;
4702         hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4703
4704         hw_scoreboard->dw1.scoreboard1.delta_x3 = 0;
4705         hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4706
4707         hw_scoreboard->dw2.scoreboard2.delta_x4 = 1;
4708         hw_scoreboard->dw2.scoreboard2.delta_y4 = -1;
4709     }
4710     break;
4711     case GEN10_WALKER_26X_DEGREE:
4712         if (hevc_state->num_regions_in_slice > 1) {
4713             int thread_space_width  = ALIGN(hevc_state->frame_width, 32) >> 5;
4714             int ts_width            = thread_space_width;
4715             int ts_height           = hevc_state->hevc_wf_param.max_height_in_region;
4716             int tmp_height          = (ts_height + 1) & 0xfffe;
4717             ts_height               =  tmp_height;
4718             tmp_height              = ((ts_width + 1) >> 1) + ((ts_width + ((tmp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4719             tmp_height             *= (hevc_state->thread_num_per_ctb);
4720
4721             media_object_walker_param->scoreboard_mask                   = 0xff;
4722
4723             media_object_walker_param->global_resolution.x               = ts_width;
4724             media_object_walker_param->global_resolution.y               = tmp_height;
4725
4726             media_object_walker_param->global_start.x                    = 0;
4727             media_object_walker_param->global_start.y                    = 0;
4728
4729             media_object_walker_param->local_start.x                     = (ts_width + 1) & 0xfffe;
4730             media_object_walker_param->local_start.y                     = 0;
4731
4732             media_object_walker_param->local_end.x                       = 0;
4733             media_object_walker_param->local_end.y                       = 0;
4734
4735             media_object_walker_param->global_outer_loop_stride.x         = ts_width;
4736             media_object_walker_param->global_outer_loop_stride.y         = 0;
4737
4738             media_object_walker_param->global_inner_loop_unit.x            = 0;
4739             media_object_walker_param->global_inner_loop_unit.y            = tmp_height;
4740
4741             media_object_walker_param->local_outer_loop_stride.x             = 1;
4742             media_object_walker_param->local_outer_loop_stride.y             = 0;
4743             media_object_walker_param->local_inner_loop_unit.x             = -2;
4744             media_object_walker_param->local_inner_loop_unit.y             = hevc_state->thread_num_per_ctb;
4745             media_object_walker_param->middle_loop_extra_steps             = hevc_state->thread_num_per_ctb - 1;
4746             media_object_walker_param->mid_loop_unit_x                     = 0;
4747             media_object_walker_param->mid_loop_unit_y                     = 1;
4748
4749             media_object_walker_param->block_resolution.x                = media_object_walker_param->global_resolution.x;
4750             media_object_walker_param->block_resolution.y                = media_object_walker_param->global_resolution.y;
4751
4752             media_object_walker_param->global_loop_exec_count            = 0;
4753             media_object_walker_param->local_loop_exec_count             = (thread_space_width + (ts_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4754         } else {
4755             media_object_walker_param->scoreboard_mask           = 0xff;
4756
4757             media_object_walker_param->global_resolution.x       = kernel_walker_param->resolution_x;
4758             media_object_walker_param->global_resolution.y       = kernel_walker_param->resolution_y * hevc_state->thread_num_per_ctb;
4759
4760             media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4761             media_object_walker_param->global_outer_loop_stride.y = 0;
4762
4763             media_object_walker_param->global_inner_loop_unit.x    = 0;
4764             media_object_walker_param->global_inner_loop_unit.y    = media_object_walker_param->global_resolution.y;
4765
4766             media_object_walker_param->local_outer_loop_stride.x     = 1;
4767             media_object_walker_param->local_outer_loop_stride.y     = 0;
4768             media_object_walker_param->local_inner_loop_unit.x     = 0xFFF - 2 + 1; // -2 in 2's compliment format;
4769             media_object_walker_param->local_inner_loop_unit.y     = hevc_state->thread_num_per_ctb;
4770             media_object_walker_param->middle_loop_extra_steps     = hevc_state->thread_num_per_ctb - 1;
4771             media_object_walker_param->mid_loop_unit_x             = 0;
4772             media_object_walker_param->mid_loop_unit_y             = 1;
4773
4774             media_object_walker_param->block_resolution.x        = media_object_walker_param->global_resolution.x;
4775             media_object_walker_param->block_resolution.y        = media_object_walker_param->global_resolution.y;
4776         }
4777
4778         {
4779             hw_scoreboard->scoreboard0.type           = hevc_state->use_hw_non_stalling_scoreboard;
4780             hw_scoreboard->scoreboard0.mask           = 0xff;
4781             hw_scoreboard->scoreboard0.enable         = hevc_state->use_hw_scoreboard;
4782
4783             hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4784             hw_scoreboard->dw1.scoreboard1.delta_y0 = hevc_state->thread_num_per_ctb - 1;
4785
4786             hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4787             hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4788
4789             hw_scoreboard->dw1.scoreboard1.delta_x2 = 0;
4790             hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4791
4792             hw_scoreboard->dw1.scoreboard1.delta_x3 = 1;
4793             hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4794
4795             hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4796             hw_scoreboard->dw2.scoreboard2.delta_y4 = -hevc_state->thread_num_per_ctb;
4797
4798             hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4799             hw_scoreboard->dw2.scoreboard2.delta_y5 = -2;
4800
4801             hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4802             hw_scoreboard->dw2.scoreboard2.delta_y6 = -3;
4803
4804             hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4805             hw_scoreboard->dw2.scoreboard2.delta_y7 = -4;
4806         }
4807
4808         break;
4809     case GEN10_WALKER_26ZX_DEGREE:
4810         mw_26zx_h_factor                            = 5;
4811
4812         if (hevc_state->num_regions_in_slice > 1) {
4813             int thread_space_width  = ALIGN(hevc_state->frame_width, 64) >> 6;
4814             int thread_space_height = hevc_state->hevc_wf_param.max_height_in_region;
4815             int sp_width  = (thread_space_width + 1) & 0xfffe;
4816             int sp_height = (thread_space_height + 1) & 0xfffe;
4817             int wf_num = (sp_width + (sp_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4818             sp_height     = ((sp_width + 1) >> 1) + ((sp_width + ((sp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4819             int ts_width  = sp_width * mw_26zx_h_factor;
4820             int ts_height = sp_height * (hevc_state->thread_num_per_ctb);
4821
4822             media_object_walker_param->scoreboard_mask          = 0xff;
4823
4824             media_object_walker_param->global_resolution.x      = ts_width;
4825             media_object_walker_param->global_resolution.y      = ts_height;
4826
4827             media_object_walker_param->global_start.x           = 0;
4828             media_object_walker_param->global_start.y           = 0;
4829
4830             media_object_walker_param->local_start.x            = media_object_walker_param->global_resolution.x;
4831             media_object_walker_param->local_start.y            = 0;
4832
4833             media_object_walker_param->local_end.x              = 0;
4834             media_object_walker_param->local_end.y              = 0;
4835
4836             media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4837             media_object_walker_param->global_outer_loop_stride.y = 0;
4838
4839             media_object_walker_param->global_inner_loop_unit.x    = 0;
4840             media_object_walker_param->global_inner_loop_unit.y    = media_object_walker_param->global_resolution.y;
4841
4842             media_object_walker_param->local_outer_loop_stride.x = 1;
4843             media_object_walker_param->local_outer_loop_stride.y = 0;
4844             media_object_walker_param->local_inner_loop_unit.x = -mw_26zx_h_factor * 2;
4845             media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4846             media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4847             media_object_walker_param->mid_loop_unit_x         = 0;
4848             media_object_walker_param->mid_loop_unit_y         = 1;
4849
4850             media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4851             media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4852
4853             media_object_walker_param->global_loop_exec_count = 0;
4854             media_object_walker_param->local_loop_exec_count  = (wf_num + 1) * mw_26zx_h_factor;
4855         } else {
4856             media_object_walker_param->scoreboard_mask = 0xff;
4857
4858             media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x * mw_26zx_h_factor;
4859             media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y * hevc_state->thread_num_per_ctb;
4860
4861             media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4862             media_object_walker_param->global_outer_loop_stride.y = 0;
4863
4864             media_object_walker_param->global_inner_loop_unit.x = 0;
4865             media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4866
4867             media_object_walker_param->local_outer_loop_stride.x = 1;
4868             media_object_walker_param->local_outer_loop_stride.y = 0;
4869             media_object_walker_param->local_inner_loop_unit.x = 0xFFF - 10 + 1; // -10 in 2's compliment format;
4870             media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4871             media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4872             media_object_walker_param->mid_loop_unit_x = 0;
4873             media_object_walker_param->mid_loop_unit_y = 1;
4874
4875             media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4876             media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4877         }
4878
4879         {
4880             hw_scoreboard->scoreboard0.mask = 0xff;
4881             hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4882             hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4883
4884             hw_scoreboard->dw1.scoreboard1.delta_x0 = -5;
4885             hw_scoreboard->dw1.scoreboard1.delta_y0 = -1;
4886
4887             hw_scoreboard->dw1.scoreboard1.delta_x1 = -2;
4888             hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4889
4890             hw_scoreboard->dw1.scoreboard1.delta_x2 = 3;
4891             hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4892
4893             hw_scoreboard->dw1.scoreboard1.delta_x3 = -1;
4894             hw_scoreboard->dw1.scoreboard1.delta_y3 = 0;
4895
4896             hw_scoreboard->dw2.scoreboard2.delta_x4 = -2;
4897             hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4898
4899             hw_scoreboard->dw2.scoreboard2.delta_x5 = -5;
4900             hw_scoreboard->dw2.scoreboard2.delta_y5 = hevc_state->thread_num_per_ctb - 1;
4901
4902             hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4903             hw_scoreboard->dw2.scoreboard2.delta_y6 = -1;
4904
4905             hw_scoreboard->dw2.scoreboard2.delta_x7 = 5;
4906             hw_scoreboard->dw2.scoreboard2.delta_y7 = -1;
4907         }
4908         break;
4909     default:
4910         break;
4911     }
4912
4913     return;
4914 }
4915
4916 static void
4917 gen10_hevc_update_scoreboard(struct i965_gpe_context *gpe_context,
4918                              struct gen10_hevc_gpe_scoreboard *scoreboard)
4919 {
4920     if (!gpe_context || !scoreboard)
4921         return;
4922
4923     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard->scoreboard0.mask;
4924     gpe_context->vfe_desc5.scoreboard0.type = scoreboard->scoreboard0.type;
4925     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard->scoreboard0.enable;
4926
4927     gpe_context->vfe_desc6.dword = scoreboard->dw1.value;
4928     gpe_context->vfe_desc7.dword = scoreboard->dw2.value;
4929     return;
4930 }
4931
4932 static void
4933 gen10_hevc_enc_mbenc_kernel(VADriverContextP ctx,
4934                             struct encode_state *encode_state,
4935                             struct intel_encoder_context *encoder_context,
4936                             int mbenc_type)
4937 {
4938     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
4939     struct gen10_hevc_enc_state *hevc_state;
4940     struct i965_gpe_context *gpe_context;
4941     int media_function;
4942     struct gpe_media_object_walker_parameter media_object_walker_param;
4943     struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
4944     struct gen10_hevc_gpe_scoreboard hw_scoreboard;
4945     int mbenc_idx;
4946
4947     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
4948
4949     if (mbenc_type != GEN10_HEVC_MBENC_INTRA)
4950         gen10_hevc_enc_generate_regions_in_slice_control(ctx, encode_state, encoder_context);
4951
4952     switch (mbenc_type) {
4953     case GEN10_HEVC_MBENC_INTER_LCU32:
4954         mbenc_idx = GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10;
4955         media_function = GEN10_HEVC_MEDIA_STATE_MBENC_LCU32;
4956         break;
4957     case GEN10_HEVC_MBENC_INTER_LCU64:
4958         mbenc_idx = GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10;
4959         media_function = GEN10_HEVC_MEDIA_STATE_MBENC_LCU64;
4960         break;
4961     case GEN10_HEVC_MBENC_INTRA:
4962     default:
4963         mbenc_idx = GEN10_HEVC_MBENC_I_KRNIDX_G10;
4964         media_function = GEN10_HEVC_MEDIA_STATE_MBENC_INTRA;
4965         break;
4966     }
4967
4968     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_idx]);
4969
4970     memset(&hw_scoreboard, 0, sizeof(hw_scoreboard));
4971     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4972     gen8_gpe_context_init(ctx, gpe_context);
4973     gen9_gpe_reset_binding_table(ctx, gpe_context);
4974
4975     kernel_walker_param.use_scoreboard = hevc_state->use_hw_scoreboard;
4976     kernel_walker_param.use_custom_walker = 0;
4977     if (mbenc_type == GEN10_HEVC_MBENC_INTRA)
4978         gen10_hevc_enc_mbenc_intra_curbe(ctx, encode_state, encoder_context, gpe_context);
4979     else
4980         gen10_hevc_enc_mbenc_inter_curbe(ctx, encode_state, encoder_context, gpe_context);
4981
4982     if (mbenc_type == GEN10_HEVC_MBENC_INTRA) {
4983         gen10_hevc_enc_mbenc_intra_surfaces(ctx, encode_state, encoder_context, gpe_context);
4984         kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width, 32) >> 5;
4985         kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height, 32) >> 5;
4986         if (hevc_state->is_64lcu) {
4987             kernel_walker_param.walker_degree = GEN10_WALKER_26_DEGREE;// 26_DEGREE
4988             kernel_walker_param.use_custom_walker = 1;
4989         } else {
4990             kernel_walker_param.use_vertical_scan = 1;
4991         }
4992     } else if (mbenc_type == GEN10_HEVC_MBENC_INTER_LCU32) {
4993         gen10_hevc_enc_mbenc_inter_lcu32_surfaces(ctx, encode_state, encoder_context, gpe_context);
4994         kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width, 32) >> 5;
4995         kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height, 32) >> 5;
4996         kernel_walker_param.use_custom_walker = 1;
4997         if (hevc_state->brc.target_usage == 7)
4998             kernel_walker_param.walker_degree = GEN10_WALKER_26_DEGREE;
4999         else
5000             kernel_walker_param.walker_degree = GEN10_WALKER_26X_DEGREE;
5001     } else {
5002         gen10_hevc_enc_mbenc_inter_lcu64_surfaces(ctx, encode_state, encoder_context, gpe_context);
5003         kernel_walker_param.resolution_x = vme_context->frame_info.width_in_lcu;
5004         kernel_walker_param.resolution_y = vme_context->frame_info.height_in_lcu;
5005         kernel_walker_param.use_custom_walker = 1;
5006         kernel_walker_param.walker_degree = GEN10_WALKER_26ZX_DEGREE;
5007     }
5008
5009     gen10_hevc_enc_generate_lculevel_data(ctx, encode_state, encoder_context);
5010
5011     memset(&hw_scoreboard, 0, sizeof(hw_scoreboard));
5012     memset(&media_object_walker_param, 0, sizeof(media_object_walker_param));
5013
5014     gen10_hevc_mbenc_init_walker_param(hevc_state, &kernel_walker_param,
5015                                        &media_object_walker_param,
5016                                        &hw_scoreboard);
5017
5018     gen10_hevc_update_scoreboard(gpe_context, &hw_scoreboard);
5019
5020     gen8_gpe_setup_interface_data(ctx, gpe_context);
5021
5022     gen10_run_kernel_media_object_walker(ctx, encoder_context,
5023                                          gpe_context,
5024                                          media_function,
5025                                          &media_object_walker_param);
5026 }
5027
5028 static VAStatus
5029 gen10_hevc_vme_pipeline_prepare(VADriverContextP ctx,
5030                                 struct encode_state *encode_state,
5031                                 struct intel_encoder_context *encoder_context)
5032 {
5033     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
5034     struct gen10_hevc_enc_state *hevc_state;
5035     struct gen10_hevc_enc_frame_info *frame_info;
5036     struct gen10_hevc_enc_common_res *common_res;
5037     int i;
5038
5039     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
5040     frame_info = &vme_context->frame_info;
5041     common_res = &vme_context->common_res;
5042
5043     if (hevc_state->is_64lcu || hevc_state->is_10bit) {
5044         if (frame_info->picture_coding_type != HEVC_SLICE_I) {
5045             for (i = 0; i < 16; i++) {
5046                 if (common_res->reference_pics[i].obj_surface == NULL)
5047                     continue;
5048
5049                 gen10_hevc_enc_conv_scaling_surface(ctx, encode_state,
5050                                                     encoder_context,
5051                                                     NULL,
5052                                                     common_res->reference_pics[i].obj_surface,
5053                                                     1);
5054             }
5055         }
5056     }
5057
5058     gen10_hevc_enc_conv_scaling_surface(ctx, encode_state, encoder_context,
5059                                         common_res->uncompressed_pic.obj_surface,
5060                                         common_res->reconstructed_pic.obj_surface,
5061                                         0);
5062
5063     return VA_STATUS_SUCCESS;
5064 }
5065
5066 static VAStatus
5067 gen10_hevc_vme_pipeline(VADriverContextP ctx,
5068                         VAProfile profile,
5069                         struct encode_state *encode_state,
5070                         struct intel_encoder_context *encoder_context)
5071 {
5072     struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
5073     struct gen10_hevc_enc_state *hevc_state;
5074     struct gen10_hevc_enc_frame_info *frame_info;
5075     VAStatus va_status = VA_STATUS_SUCCESS;
5076
5077     if (!vme_context || !vme_context->enc_priv_state)
5078         return VA_STATUS_ERROR_INVALID_CONTEXT;
5079
5080     hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
5081     frame_info = &vme_context->frame_info;
5082
5083     va_status = gen10_hevc_enc_init_parameters(ctx, encode_state, encoder_context);
5084     if (va_status != VA_STATUS_SUCCESS)
5085         return va_status;
5086
5087     va_status = gen10_hevc_vme_pipeline_prepare(ctx, encode_state, encoder_context);
5088     if (va_status != VA_STATUS_SUCCESS)
5089         return va_status;
5090
5091     if (hevc_state->brc.brc_reset || !hevc_state->brc.brc_inited) {
5092         gen10_hevc_enc_brc_init_reset(ctx, encode_state, encoder_context);
5093
5094         hevc_state->brc.brc_inited = 1;
5095         hevc_state->brc.brc_reset = 0;
5096     }
5097
5098     if (frame_info->picture_coding_type == HEVC_SLICE_I) {
5099         gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5100                                  GEN10_HEVC_HME_LEVEL_4X,
5101                                  GEN10_HEVC_ME_DIST_TYPE_INTRA_BRC);
5102     } else {
5103         if (hevc_state->hme_enabled) {
5104             if (hevc_state->b16xme_enabled)
5105                 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5106                                          GEN10_HEVC_HME_LEVEL_16X,
5107                                          GEN10_HEVC_ME_DIST_TYPE_INTER_BRC);
5108
5109
5110
5111             gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5112                                      GEN10_HEVC_HME_LEVEL_4X,
5113                                      GEN10_HEVC_ME_DIST_TYPE_INTER_BRC);
5114         }
5115     }
5116
5117     gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5118                              GEN10_HEVC_HME_LEVEL_4X,
5119                              GEN10_HEVC_ME_DIST_TYPE_INTRA);
5120
5121     gen10_hevc_enc_brc_frame_update_kernel(ctx, encode_state,
5122                                            encoder_context);
5123
5124     gen10_hevc_enc_brc_lcu_update_kernel(ctx, encode_state,
5125                                          encoder_context);
5126
5127     if (frame_info->picture_coding_type == HEVC_SLICE_I)
5128         gen10_hevc_enc_mbenc_kernel(ctx, encode_state, encoder_context,
5129                                     GEN10_HEVC_MBENC_INTRA);
5130     else
5131         gen10_hevc_enc_mbenc_kernel(ctx, encode_state, encoder_context,
5132                                     (hevc_state->is_64lcu ?
5133                                      GEN10_HEVC_MBENC_INTER_LCU64 :
5134                                      GEN10_HEVC_MBENC_INTER_LCU32));
5135
5136
5137 #if 0
5138     if (hevc_state->frame_number == 0) {
5139         struct gen10_hevc_surface_priv *surface_priv = NULL;
5140
5141         surface_priv = (struct gen10_hevc_surface_priv *)encode_state->reconstructed_object->private_data;
5142         //print_out_obj_surface(ctx, surface_priv->scaled_4x_surface_id, 1);
5143
5144         //print_out_gpe_resource(&vme_context->res_mb_code_surface, 0,
5145         //                       hevc_state->cu_records_offset, 1, 0, 0, 64);
5146         //print_out_gpe_resource(&vme_context->res_mb_code_surface, 0,
5147         //                       0, 1, 0, 0, 64);
5148         //print_out_gpe_resource(&vme_context->res_s4x_me_dist_surface, 0,
5149         //                       0, 1, 0, 0, 64);
5150
5151         //return VA_STATUS_ERROR_INVALID_PARAMETER;
5152     }
5153 #endif
5154     return VA_STATUS_SUCCESS;
5155 }
5156
5157 static void
5158 gen10_hevc_hcp_pipe_mode_select(VADriverContextP ctx,
5159                                 struct encode_state *encode_state,
5160                                 struct intel_encoder_context *encoder_context,
5161                                 struct intel_batchbuffer *batch)
5162 {
5163     struct gen10_hevc_enc_context *pak_context;
5164     struct gen10_hevc_enc_state *hevc_state;
5165     gen10_hcp_pipe_mode_select_param param;
5166
5167     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5168     hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5169
5170     memset(&param, 0, sizeof(param));
5171
5172     param.dw1.codec_select = GEN10_HCP_ENCODE;
5173     param.dw1.codec_standard_select = GEN10_HCP_HEVC_CODEC;
5174     param.dw1.sao_first_pass = hevc_state->sao_first_pass_flag;
5175     param.dw1.rdoq_enabled = hevc_state->rdoq_enabled;
5176     param.dw1.pak_frame_level_streamout_enabled = 1;
5177
5178     if (hevc_state->brc.brc_enabled &&
5179         hevc_state->curr_pak_idx != (hevc_state->num_sao_passes - 1))
5180         param.dw1.pak_streamout_enabled = 1;
5181
5182     gen10_hcp_pipe_mode_select(ctx, batch, &param);
5183 }
5184
5185 static void
5186 gen10_hevc_hcp_multi_surfaces(VADriverContextP ctx,
5187                               struct encode_state *encode_state,
5188                               struct intel_encoder_context *encoder_context,
5189                               struct intel_batchbuffer *batch)
5190 {
5191     struct gen10_hevc_enc_context *pak_context;
5192     gen10_hcp_surface_state_param param;
5193     struct object_surface *obj_surface;
5194     int i = 0;
5195
5196     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5197
5198     for (i = 0; i < 2; i++) {
5199         if (i == 0)
5200             obj_surface = pak_context->common_res.reconstructed_pic.obj_surface;
5201         else
5202             obj_surface = pak_context->common_res.uncompressed_pic.obj_surface;
5203
5204         memset(&param, 0, sizeof(param));
5205
5206         param.dw1.surface_pitch = obj_surface->width - 1;
5207         param.dw1.surface_id = (i == 0 ? GEN10_HCP_DECODE_SURFACE_ID :
5208                                 GEN10_HCP_INPUT_SURFACE_ID);
5209         param.dw2.y_cb_offset = obj_surface->y_cb_offset;
5210
5211         if (obj_surface->fourcc == VA_FOURCC_P010)
5212             param.dw2.surface_format = SURFACE_FORMAT_P010;
5213         else if (obj_surface->fourcc == VA_FOURCC_NV12)
5214             param.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5215         else
5216             assert(0);
5217
5218         gen10_hcp_surface_state(ctx, batch, &param);
5219     }
5220 }
5221
5222 static void
5223 gen10_hevc_hcp_pipe_buf_state(VADriverContextP ctx,
5224                               struct encode_state *encode_state,
5225                               struct intel_encoder_context *encoder_context,
5226                               struct intel_batchbuffer *batch)
5227 {
5228     struct gen10_hevc_enc_context *pak_context;
5229     struct gen10_hevc_surface_priv *surface_priv;
5230     gen10_hcp_pipe_buf_addr_state_param param;
5231     struct gen10_hevc_enc_common_res *common_res;
5232     int i;
5233
5234     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5235
5236     common_res = &pak_context->common_res;
5237     surface_priv = (struct gen10_hevc_surface_priv *)common_res->
5238                    reconstructed_pic.obj_surface->private_data;
5239
5240     memset(&param, 0, sizeof(param));
5241
5242     param.reconstructed = &common_res->reconstructed_pic.gpe_res;
5243     param.deblocking_filter_line = &common_res->deblocking_filter_line_buffer;
5244     param.deblocking_filter_tile_line = &common_res->deblocking_filter_tile_line_buffer;
5245     param.deblocking_filter_tile_column = &common_res->deblocking_filter_tile_column_buffer;
5246     param.metadata_line = &common_res->metadata_line_buffer;
5247     param.metadata_tile_line = &common_res->metadata_tile_line_buffer;
5248     param.metadata_tile_column = &common_res->metadata_tile_column_buffer;
5249     param.sao_line = &common_res->sao_line_buffer;
5250     param.sao_tile_line = &common_res->sao_tile_line_buffer;
5251     param.sao_tile_column = &common_res->sao_tile_column_buffer;
5252
5253     if (surface_priv)
5254         param.current_motion_vector_temporal = &surface_priv->motion_vector_temporal;
5255
5256     for (i = 0; i < 8; i++) {
5257         if (common_res->reference_pics[i].obj_surface)
5258             param.reference_picture[i] = &common_res->reference_pics[i].gpe_res;
5259     }
5260
5261     param.uncompressed_picture = &common_res->uncompressed_pic.gpe_res;
5262     param.streamout_data_destination = &common_res->streamout_data_destination_buffer;
5263     param.picture_status = &common_res->picture_status_buffer;
5264     param.ildb_streamout = &common_res->ildb_streamout_buffer;
5265
5266     for (i = 0; i < 8; i++) {
5267         if (common_res->reference_pics[i].obj_surface) {
5268             surface_priv = (struct gen10_hevc_surface_priv *)common_res->
5269                            reference_pics[i].obj_surface->private_data;
5270             if (surface_priv)
5271                 param.collocated_motion_vector_temporal[i] =
5272                     &surface_priv->motion_vector_temporal;
5273         }
5274     }
5275
5276     param.sao_streamout_data_destination = &common_res->sao_streamout_data_destination_buffer;
5277     param.frame_statics_streamout_data_destination =
5278         &common_res->frame_statics_streamout_data_destination_buffer;
5279     param.sse_source_pixel_rowstore = &common_res->sse_source_pixel_rowstore_buffer;
5280
5281     gen10_hcp_pipe_buf_addr_state(ctx, batch, &param);
5282 }
5283
5284 static void
5285 gen10_hevc_hcp_ind_obj_base_addr_state(VADriverContextP ctx,
5286                                        struct encode_state *encode_state,
5287                                        struct intel_encoder_context *encoder_context,
5288                                        struct intel_batchbuffer *batch)
5289 {
5290     struct gen10_hevc_enc_context *pak_context;
5291     struct gen10_hevc_enc_state *hevc_state;
5292     gen10_hcp_ind_obj_base_addr_state_param param;
5293
5294     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5295     hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5296
5297     memset(&param, 0, sizeof(param));
5298
5299     param.ind_cu_obj_bse = &pak_context->res_mb_code_surface;
5300     param.ind_cu_obj_bse_offset = hevc_state->cu_records_offset;
5301
5302     param.ind_pak_bse = &pak_context->common_res.compressed_bitstream.gpe_res;
5303     param.ind_pak_bse_offset = pak_context->common_res.compressed_bitstream.offset;
5304     param.ind_pak_bse_upper = pak_context->common_res.compressed_bitstream.end_offset;
5305
5306     gen10_hcp_ind_obj_base_addr_state(ctx, batch, &param);
5307 }
5308
5309 static void
5310 gen10_hevc_hcp_qm_fqm_state(VADriverContextP ctx,
5311                             struct encode_state *encode_state,
5312                             struct intel_encoder_context *encoder_context,
5313                             struct intel_batchbuffer *batch)
5314 {
5315     struct gen10_hevc_enc_context *pak_context;
5316
5317     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5318
5319     gen10_hevc_enc_hcp_set_qm_fqm_states(ctx, batch, &pak_context->frame_info);
5320 }
5321
5322 static void
5323 gen10_hevc_hcp_pic_state(VADriverContextP ctx,
5324                          struct encode_state *encode_state,
5325                          struct intel_encoder_context *encoder_context,
5326                          struct intel_batchbuffer *batch)
5327 {
5328     struct gen10_hevc_enc_context *pak_context;
5329     struct gen10_hevc_enc_state *hevc_state;
5330     VAEncSequenceParameterBufferHEVC *seq_param;
5331     VAEncPictureParameterBufferHEVC *pic_param;
5332     VAEncSliceParameterBufferHEVC *slice_param;
5333     struct gen10_hevc_enc_frame_info *frame_info;
5334     gen10_hcp_pic_state_param param;
5335
5336     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5337     hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5338
5339     frame_info = &pak_context->frame_info;
5340     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
5341     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5342     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
5343
5344     memset(&param, 0, sizeof(param));
5345     param.dw1.frame_width_in_cu_minus1 = frame_info->width_in_cu - 1;
5346     param.dw1.frame_height_in_cu_minus1 = frame_info->height_in_cu - 1;
5347     param.dw1.pak_transform_skip = pic_param->pic_fields.bits.transform_skip_enabled_flag;
5348
5349     param.dw2.min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3;
5350     param.dw2.lcu_size = seq_param->log2_min_luma_coding_block_size_minus3 +
5351                          seq_param->log2_diff_max_min_luma_coding_block_size;
5352     param.dw2.min_tu_size = seq_param->log2_min_transform_block_size_minus2;
5353     param.dw2.max_tu_size = seq_param->log2_min_transform_block_size_minus2 +
5354                             seq_param->log2_diff_max_min_transform_block_size;
5355     param.dw2.min_pcm_size = 0;
5356     param.dw2.max_pcm_size = 0;
5357
5358     if ((slice_param->slice_fields.bits.slice_sao_luma_flag ||
5359          slice_param->slice_fields.bits.slice_sao_chroma_flag) &&
5360         !frame_info->bit_depth_luma_minus8)
5361         param.dw4.sao_enabled_flag = 1;
5362
5363     if (pic_param->pic_fields.bits.cu_qp_delta_enabled_flag) {
5364         param.dw4.cu_qp_delta_enabled_flag = 1;
5365         param.dw4.diff_cu_qp_delta_depth = pic_param->diff_cu_qp_delta_depth;
5366     }
5367
5368     param.dw4.pcm_loop_filter_disable_flag = seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag;
5369     param.dw4.weighted_bipred_flag = pic_param->pic_fields.bits.weighted_bipred_flag;
5370     param.dw4.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
5371     param.dw4.transform_skip_enabled_flag = pic_param->pic_fields.bits.transform_skip_enabled_flag;
5372     param.dw4.amp_enabled_flag = seq_param->seq_fields.bits.amp_enabled_flag;
5373     param.dw4.transquant_bypass_enabled_flag = pic_param->pic_fields.bits.transquant_bypass_enabled_flag;
5374     param.dw4.strong_intra_smoothing_enabled_flag = seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag;
5375
5376     param.dw5.pic_cb_qp_offset = pic_param->pps_cr_qp_offset & 0x1f;
5377     param.dw5.pic_cr_qp_offset = pic_param->pps_cb_qp_offset & 0x1f;
5378     param.dw5.max_transform_hierarchy_depth_intra = seq_param->max_transform_hierarchy_depth_intra;
5379     param.dw5.max_transform_hierarchy_depth_inter = seq_param->max_transform_hierarchy_depth_inter;
5380     param.dw5.pcm_sample_bit_depth_chroma_minus1 = seq_param->pcm_sample_bit_depth_chroma_minus1;
5381     param.dw5.pcm_sample_bit_depth_luma_minus1 = seq_param->pcm_sample_bit_depth_luma_minus1;
5382     param.dw5.bit_depth_chroma_minus8 = seq_param->seq_fields.bits.bit_depth_chroma_minus8;
5383     param.dw5.bit_depth_luma_minus8 = seq_param->seq_fields.bits.bit_depth_luma_minus8;
5384
5385     param.dw6.lcu_max_bits_allowed = frame_info->ctu_max_bitsize_allowed;
5386
5387     param.dw19.rho_domain_rc_enabled = 0;
5388     param.dw19.rho_domain_frame_qp = 0;
5389     param.dw19.fraction_qp_adj_enabled = 0;
5390     param.dw19.first_slice_segment_in_pic_flag = 1;
5391     param.dw19.nal_unit_type_flag = 1;
5392     param.dw19.sse_enabled = 1;
5393     param.dw19.rhoq_enabled = hevc_state->rdoq_enabled;
5394
5395     gen10_hcp_pic_state(ctx, batch, &param);
5396 }
5397
5398 static void
5399 gen10_hevc_hcp_rdoq_state(VADriverContextP ctx,
5400                           struct encode_state *encode_state,
5401                           struct intel_encoder_context *encoder_context,
5402                           struct intel_batchbuffer *batch)
5403 {
5404     struct gen10_hevc_enc_context *pak_context;
5405     gen10_hcp_rdoq_state_param param;
5406
5407     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5408
5409     memset(&param, 0, sizeof(param));
5410
5411     memcpy(param.lambda_intra_luma, pak_context->lambda_param.lambda_intra[0],
5412            sizeof(param.lambda_intra_luma));
5413     memcpy(param.lambda_intra_chroma, pak_context->lambda_param.lambda_intra[1],
5414            sizeof(param.lambda_intra_chroma));
5415     memcpy(param.lambda_inter_luma, pak_context->lambda_param.lambda_inter[0],
5416            sizeof(param.lambda_inter_luma));
5417     memcpy(param.lambda_inter_chroma, pak_context->lambda_param.lambda_inter[1],
5418            sizeof(param.lambda_inter_chroma));
5419
5420     gen10_hcp_rdoq_state(ctx, batch, &param);
5421 }
5422
5423 static void
5424 gen10_hevc_pak_picture_level(VADriverContextP ctx,
5425                              struct encode_state *encode_state,
5426                              struct intel_encoder_context *encoder_context)
5427 {
5428     struct intel_batchbuffer *batch = encoder_context->base.batch;
5429     struct gen10_hevc_enc_context *pak_context;
5430     struct gen10_hevc_enc_state *hevc_state;
5431
5432     pak_context = (struct gen10_hevc_enc_context *)encoder_context->mfc_context;
5433     hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5434
5435     gen10_hevc_hcp_pipe_mode_select(ctx, encode_state, encoder_context, batch);
5436     gen10_hevc_hcp_multi_surfaces(ctx, encode_state, encoder_context, batch);
5437     gen10_hevc_hcp_pipe_buf_state(ctx, encode_state, encoder_context, batch);
5438     gen10_hevc_hcp_ind_obj_base_addr_state(ctx, encode_state, encoder_context, batch);
5439     gen10_hevc_hcp_qm_fqm_state(ctx, encode_state, encoder_context, batch);
5440
5441     if (hevc_state->brc.brc_enabled) {
5442         struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5443
5444         memset(&second_level_batch, 0, sizeof(second_level_batch));
5445         second_level_batch.offset = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS *
5446                                     hevc_state->curr_pak_idx;
5447         second_level_batch.is_second_level = 1;
5448         second_level_batch.bo = pak_context->res_brc_pic_image_state_write_buffer.bo;
5449
5450         gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5451     } else
5452         gen10_hevc_hcp_pic_state(ctx, encode_state, encoder_context, batch);
5453
5454     if (hevc_state->rdoq_enabled)
5455         gen10_hevc_hcp_rdoq_state(ctx, encode_state, encoder_context, batch);
5456 }
5457
5458 static void
5459 gen10_hevc_hcp_weightoffset(VADriverContextP ctx,
5460                             struct encode_state *encode_state,
5461                             struct intel_encoder_context *encoder_context,
5462                             struct intel_batchbuffer *batch,
5463                             int slice_index)
5464 {
5465     VAEncPictureParameterBufferHEVC *pic_param;
5466     VAEncSliceParameterBufferHEVC *slice_param;
5467
5468     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5469     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5470
5471     gen10_hevc_enc_hcp_set_weight_offsets(ctx, batch, pic_param, slice_param);
5472 }
5473
5474 static void
5475 gen10_hevc_ref_idx_lists(VADriverContextP ctx,
5476                          struct encode_state *encode_state,
5477                          struct intel_encoder_context *encoder_context,
5478                          struct intel_batchbuffer *batch,
5479                          int slice_index)
5480 {
5481     VAEncPictureParameterBufferHEVC *pic_param;
5482     VAEncSliceParameterBufferHEVC *slice_param;
5483
5484     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5485     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5486
5487     if (slice_param->slice_type != HEVC_SLICE_I)
5488         gen10_hevc_enc_hcp_set_ref_idx_lists(ctx, batch, pic_param, slice_param);
5489 }
5490
5491 static void
5492 gen10_hevc_hcp_slice_state(VADriverContextP ctx,
5493                            struct encode_state *encode_state,
5494                            struct intel_encoder_context *encoder_context,
5495                            struct intel_batchbuffer *batch,
5496                            int slice_index)
5497 {
5498     struct gen10_hevc_enc_context *pak_context;
5499     struct gen10_hevc_enc_state *hevc_state;
5500     VAEncPictureParameterBufferHEVC *pic_param;
5501     VAEncSliceParameterBufferHEVC *slice_param;
5502     gen10_hcp_slice_state_param param;
5503     int last_slice, slice_qp, qp_idx;
5504
5505     pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5506     hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5507
5508     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5509     slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5510
5511     memset(&param, 0, sizeof(param));
5512
5513     param.dw1.slice_start_ctu_x = slice_param->slice_segment_address %
5514                                   pak_context->frame_info.width_in_lcu;
5515     param.dw1.slice_start_ctu_y = slice_param->slice_segment_address /
5516                                   pak_context->frame_info.width_in_lcu;
5517
5518     if (slice_index == encode_state->num_slice_params_ext - 1) {
5519         param.dw2.next_slice_start_ctu_x = 0;
5520         param.dw2.next_slice_start_ctu_y = 0;
5521
5522         last_slice = 1;
5523     } else {
5524         last_slice = slice_param->slice_segment_address + slice_param->num_ctu_in_slice;
5525
5526         param.dw2.next_slice_start_ctu_x = last_slice %
5527                                            pak_context->frame_info.width_in_lcu;
5528         param.dw2.next_slice_start_ctu_y = last_slice /
5529                                            pak_context->frame_info.width_in_lcu;
5530
5531         last_slice = 0;
5532     }
5533
5534     param.dw3.slice_type = slice_param->slice_type;
5535     param.dw3.last_slice_flag = last_slice;
5536     param.dw3.slice_temporal_mvp_enabled = slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag;
5537     param.dw3.slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5538     param.dw3.slice_cb_qp_offset = slice_param->slice_cb_qp_offset;
5539     param.dw3.slice_cr_qp_offset = slice_param->slice_cr_qp_offset;
5540
5541     param.dw4.deblocking_filter_disable = slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag;
5542     param.dw4.tc_offset_div2 = slice_param->slice_tc_offset_div2 & 0xf;
5543     param.dw4.beta_offset_div2 = slice_param->slice_beta_offset_div2 & 0xf;
5544     param.dw4.sao_chroma_flag = slice_param->slice_fields.bits.slice_sao_chroma_flag;
5545     param.dw4.sao_luma_flag = slice_param->slice_fields.bits.slice_sao_luma_flag;
5546     param.dw4.mvd_l1_zero_flag = slice_param->slice_fields.bits.mvd_l1_zero_flag;
5547     param.dw4.is_low_delay = slice_param->slice_type != HEVC_SLICE_B ? 1 : hevc_state->low_delay;
5548     param.dw4.collocated_from_l0_flag = slice_param->slice_fields.bits.collocated_from_l0_flag;
5549     param.dw4.chroma_log2_weight_denom = slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom;
5550     param.dw4.luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
5551     param.dw4.cabac_init_flag = slice_param->slice_fields.bits.cabac_init_flag;
5552     param.dw4.max_merge_idx = slice_param->max_num_merge_cand - 1;
5553
5554     if (pic_param->collocated_ref_pic_index != 0xFF)
5555         param.dw4.collocated_ref_idx = pic_param->collocated_ref_pic_index;
5556
5557     param.dw6.round_intra = 10;
5558     param.dw6.round_inter = 4;
5559
5560     param.dw7.cabac_zero_word_insertion_enabled = 1;
5561     param.dw7.emulation_byte_insert_enabled = 1;
5562     param.dw7.slice_data_enabled = 1;
5563     param.dw7.header_insertion_enabled = 1;
5564
5565     if (pic_param->pic_fields.bits.transform_skip_enabled_flag) {
5566         slice_qp = pak_context->frame_info.slice_qp;
5567
5568         if (slice_qp <= 22)
5569             qp_idx = 0;
5570         else if (slice_qp <= 27)
5571             qp_idx = 1;
5572         else if (slice_qp <= 32)
5573             qp_idx = 2;
5574         else
5575             qp_idx = 3;
5576
5577         param.dw9.transform_skip_lambda = gen10_hevc_tr_lambda_coeffs[slice_qp];
5578
5579         if (slice_param->slice_type == HEVC_SLICE_I) {
5580             param.dw10.transform_skip_zero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][0][0];
5581             param.dw10.transform_skip_nonezero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][1][0];
5582             param.dw10.transform_skip_zero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][0][1] + 32;
5583             param.dw10.transform_skip_nonezero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][1][1] + 32;
5584         } else {
5585             param.dw10.transform_skip_zero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][0][0];
5586             param.dw10.transform_skip_nonezero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][1][0];
5587             param.dw10.transform_skip_zero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][0][1] + 32;
5588             param.dw10.transform_skip_nonezero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][1][1] + 32;
5589         }
5590     }
5591
5592     gen10_hcp_slice_state(ctx, batch, &param);
5593 }
5594
5595 static void
5596 gen10_hevc_pak_slice_level(VADriverContextP ctx,
5597                            struct encode_state *encode_state,
5598                            struct intel_encoder_context *encoder_context)
5599 {
5600     struct intel_batchbuffer *batch = encoder_context->base.batch;
5601     struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5602     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5603     VAEncSliceParameterBufferHEVC *slice_param;
5604     int slice_index;
5605     int i, j;
5606
5607     slice_index = 0;
5608     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
5609         for (j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
5610             slice_param = (VAEncSliceParameterBufferHEVC *)(encode_state->slice_params_ext[slice_index]->buffer);
5611
5612             gen10_hevc_ref_idx_lists(ctx, encode_state, encoder_context, batch, slice_index);
5613
5614             gen10_hevc_hcp_weightoffset(ctx, encode_state, encoder_context,
5615                                         batch, slice_index);
5616
5617             gen10_hevc_hcp_slice_state(ctx, encode_state, encoder_context,
5618                                        batch, slice_index);
5619
5620             if (slice_index == 0)
5621                 gen10_hevc_enc_insert_packed_header(ctx, encode_state, encoder_context,
5622                                                     batch);
5623
5624             gen10_hevc_enc_insert_slice_header(ctx, encode_state, encoder_context,
5625                                                batch, slice_index);
5626
5627
5628             memset(&second_level_batch, 0, sizeof(second_level_batch));
5629             second_level_batch.offset = 32 * slice_param->slice_segment_address;
5630             second_level_batch.is_second_level = 1;
5631             second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5632
5633             gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5634
5635             slice_index++;
5636         }
5637     }
5638 }
5639
5640 static void
5641 gen10_hevc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5642 {
5643     struct intel_batchbuffer *batch = encoder_context->base.batch;
5644     struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5645     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5646     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
5647     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5648     struct gen10_hevc_enc_status_buffer *status_buffer;
5649     struct gen10_hevc_enc_state *hevc_state;
5650     int write_pak_idx;
5651
5652     hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5653     status_buffer = &pak_context->status_buffer;
5654
5655     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5656     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5657
5658     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5659     mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5660     mi_store_reg_mem_param.offset = status_buffer->status_bytes_per_frame_offset;
5661     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bytes_per_frame_offset;
5662     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5663
5664     mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5665     mi_store_reg_mem_param.offset = status_buffer->status_image_mask_offset;
5666     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_mask_offset;
5667     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5668
5669     mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5670     mi_store_reg_mem_param.offset = status_buffer->status_image_ctrl_offset;
5671     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5672     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5673
5674     mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5675     mi_store_reg_mem_param.offset = status_buffer->status_qp_status_offset;
5676     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_qp_status_offset;
5677     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5678
5679     mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5680     mi_store_reg_mem_param.offset = status_buffer->status_bs_se_bitcount_offset;
5681     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bs_se_bitcount_offset;
5682     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5683
5684     write_pak_idx = hevc_state->curr_pak_stat_index;
5685     mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5686     mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_bs_frame);
5687     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bytes_per_frame_offset;
5688     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5689
5690     mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5691     mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_bs_frame_noheader);
5692     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bs_frame_no_header_offset;
5693     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5694
5695     mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5696     mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_image_status_control);
5697     mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5698     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5699
5700     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
5701     mi_store_data_imm_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5702     mi_store_data_imm_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_image_status_ctl_last_pass);
5703     mi_store_data_imm_param.dw0 = hevc_state->curr_pak_idx;
5704     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
5705
5706     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5707 }
5708
5709 static void
5710 gen10_hevc_pak_brc_prepare(struct encode_state *encode_state,
5711                            struct intel_encoder_context *encoder_context)
5712 {
5713     return;
5714 }
5715
5716 static void
5717 gen10_hevc_pak_context_destroy(void *context)
5718 {
5719     return;
5720 }
5721
5722 static VAStatus
5723 gen10_hevc_pak_pipeline(VADriverContextP ctx,
5724                         VAProfile profile,
5725                         struct encode_state *encode_state,
5726                         struct intel_encoder_context *encoder_context)
5727 {
5728     struct i965_driver_data *i965 = i965_driver_data(ctx);
5729     struct intel_batchbuffer *batch = encoder_context->base.batch;
5730     struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5731     struct gen10_hevc_enc_status_buffer *status_buffer;
5732     struct gen10_hevc_enc_state *hevc_state;
5733     struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5734     struct gpe_mi_load_register_mem_parameter mi_load_reg_mem;
5735     struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5736     int i;
5737
5738     if (!pak_context || !pak_context->enc_priv_state)
5739         return VA_STATUS_ERROR_INVALID_CONTEXT;
5740
5741     hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5742     status_buffer = &pak_context->status_buffer;
5743
5744     if (i965->intel.has_bsd2)
5745         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5746     else
5747         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5748
5749     intel_batchbuffer_emit_mi_flush(batch);
5750
5751     for (hevc_state->curr_pak_idx = 0;
5752          hevc_state->curr_pak_idx < hevc_state->num_pak_passes;
5753          hevc_state->curr_pak_idx++) {
5754         if (hevc_state->curr_pak_idx == 0) {
5755             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5756             mi_load_reg_imm.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5757             mi_load_reg_imm.data = 0;
5758             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5759         } else if (hevc_state->brc.brc_enabled) {
5760             memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5761             mi_cond_end.offset = status_buffer->status_image_mask_offset;
5762             mi_cond_end.bo = status_buffer->gpe_res.bo;
5763             mi_cond_end.compare_data = 0;
5764             gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5765                                                      &mi_cond_end);
5766
5767             memset(&mi_load_reg_mem, 0, sizeof(mi_load_reg_mem));
5768             mi_load_reg_mem.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5769             mi_load_reg_mem.bo = status_buffer->gpe_res.bo;
5770             mi_load_reg_mem.offset = status_buffer->status_image_ctrl_offset;
5771             gen8_gpe_mi_load_register_mem(ctx, batch, &mi_load_reg_mem);
5772         }
5773
5774         gen10_hevc_pak_picture_level(ctx, encode_state, encoder_context);
5775         gen10_hevc_pak_slice_level(ctx, encode_state, encoder_context);
5776         gen10_hevc_read_mfc_status(ctx, encoder_context);
5777     }
5778
5779     intel_batchbuffer_end_atomic(batch);
5780     intel_batchbuffer_flush(batch);
5781
5782     if (hevc_state->sao_2nd_needed) {
5783         if (i965->intel.has_bsd2)
5784             intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5785         else
5786             intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5787
5788         intel_batchbuffer_emit_mi_flush(batch);
5789
5790         BEGIN_BCS_BATCH(batch, 64);
5791         for (i = 0; i < 64; i++)
5792             OUT_BCS_BATCH(batch, MI_NOOP);
5793
5794         ADVANCE_BCS_BATCH(batch);
5795         gen10_hevc_pak_picture_level(ctx, encode_state, encoder_context);
5796         gen10_hevc_pak_slice_level(ctx, encode_state, encoder_context);
5797         gen10_hevc_read_mfc_status(ctx, encoder_context);
5798         intel_batchbuffer_end_atomic(batch);
5799         intel_batchbuffer_flush(batch);
5800     }
5801
5802     hevc_state->curr_pak_stat_index ^= 1;
5803
5804     hevc_state->frame_number++;
5805
5806     return VA_STATUS_SUCCESS;
5807 }
5808
5809 static void
5810 gen10_hevc_vme_context_destroy(void *context)
5811 {
5812     struct gen10_hevc_enc_context *vme_context = context;
5813     int i;
5814
5815     if (!vme_context)
5816         return;
5817
5818     gen10_hevc_free_enc_resources(context);
5819
5820     gen10_hevc_enc_free_common_resource(&vme_context->common_res);
5821
5822     gen8_gpe_context_destroy(&vme_context->scaling_context.gpe_context);
5823
5824     gen8_gpe_context_destroy(&vme_context->me_context.gpe_context);
5825
5826     for (i = 0; i < GEN10_HEVC_BRC_NUM; i++)
5827         gen8_gpe_context_destroy(&vme_context->brc_context.gpe_contexts[i]);
5828
5829     for (i = 0; i < GEN10_HEVC_MBENC_NUM; i++)
5830         gen8_gpe_context_destroy(&vme_context->mbenc_context.gpe_contexts[i]);
5831
5832     if (vme_context->enc_priv_state)
5833         free(vme_context->enc_priv_state);
5834
5835     free(vme_context);
5836 }
5837
5838 Bool
5839 gen10_hevc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5840 {
5841     struct gen10_hevc_enc_context *vme_context = NULL;
5842     struct gen10_hevc_enc_state *hevc_state = NULL;
5843
5844     vme_context = calloc(1, sizeof(struct gen10_hevc_enc_context));
5845     hevc_state = calloc(1, sizeof(struct gen10_hevc_enc_state));
5846
5847     if (!vme_context || !hevc_state) {
5848         if (vme_context)
5849             free(vme_context);
5850
5851         if (hevc_state)
5852             free(hevc_state);
5853
5854         return false;
5855     }
5856
5857     vme_context->enc_priv_state = hevc_state;
5858
5859     gen10_hevc_vme_init_kernels_context(ctx, encoder_context, vme_context);
5860
5861     hevc_state->use_hw_scoreboard = 1;
5862     hevc_state->use_hw_non_stalling_scoreboard = 0;
5863     hevc_state->num_regions_in_slice = 1;
5864     hevc_state->rdoq_enabled = 1;
5865
5866     encoder_context->vme_context = vme_context;
5867     encoder_context->vme_pipeline = gen10_hevc_vme_pipeline;
5868     encoder_context->vme_context_destroy = gen10_hevc_vme_context_destroy;
5869
5870     return true;
5871 }
5872
5873 static VAStatus
5874 gen10_hevc_get_coded_status(VADriverContextP ctx,
5875                             struct intel_encoder_context *encoder_context,
5876                             struct i965_coded_buffer_segment *coded_buf_seg)
5877 {
5878     struct gen10_hevc_enc_status *enc_status;
5879
5880     if (!encoder_context || !coded_buf_seg)
5881         return VA_STATUS_ERROR_INVALID_BUFFER;
5882
5883     enc_status = (struct gen10_hevc_enc_status *)coded_buf_seg->codec_private_data;
5884     coded_buf_seg->base.size = enc_status->bytes_per_frame;
5885
5886     return VA_STATUS_SUCCESS;
5887 }
5888
5889 Bool
5890 gen10_hevc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5891 {
5892     struct gen10_hevc_enc_context *pak_context = encoder_context->vme_context;
5893
5894     if (!pak_context)
5895         return false;
5896
5897     encoder_context->mfc_context = pak_context;
5898     encoder_context->mfc_context_destroy = gen10_hevc_pak_context_destroy;
5899     encoder_context->mfc_pipeline = gen10_hevc_pak_pipeline;
5900     encoder_context->mfc_brc_prepare = gen10_hevc_pak_brc_prepare;
5901     encoder_context->get_status = gen10_hevc_get_coded_status;
5902
5903     return true;
5904 }