OSDN Git Service

jpeg/dec: gen8+ set correct fourcc for monochrome decode
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc_hevc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Qu Pengfei <Pengfei.Qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen9_mfc.h"
42 #include "gen6_vme.h"
43 #include "intel_media.h"
44
45 typedef enum _gen6_brc_status {
46     BRC_NO_HRD_VIOLATION = 0,
47     BRC_UNDERFLOW = 1,
48     BRC_OVERFLOW = 2,
49     BRC_UNDERFLOW_WITH_MAX_QP = 3,
50     BRC_OVERFLOW_WITH_MIN_QP = 4,
51 } gen6_brc_status;
52
53 /* BRC define */
54 #define BRC_CLIP(x, min, max)                                   \
55     {                                                           \
56         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
57     }
58
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
62
63 #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
65
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
70
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
72
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do {     \
75         dri_bo_unreference(gen_buffer->bo);                     \
76         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
77                                       string,                   \
78                                       size,                     \
79                                       0x1000);                  \
80         assert(gen_buffer->bo);                                 \
81     } while (0);
82
83
84 #define OUT_BUFFER_X(buf_bo, is_target, ma)  do {                         \
85         if (buf_bo) {                                                   \
86             OUT_BCS_RELOC(batch,                                        \
87                           buf_bo,                                       \
88                           I915_GEM_DOMAIN_INSTRUCTION,                       \
89                           is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0,       \
90                           0);                                           \
91         } else {                                                        \
92             OUT_BCS_BATCH(batch, 0);                                    \
93         }                                                               \
94         OUT_BCS_BATCH(batch, 0);                                        \
95         if (ma)                                                         \
96             OUT_BCS_BATCH(batch, 0);                                    \
97     } while (0)
98
99 #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER_X(buf_bo, 0, 0)
103
104
105 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
108
109 #define HCP_SOFTWARE_SKYLAKE    1
110
111 #define NUM_HCPE_KERNEL 2
112
113 #define     INTER_MODE_MASK     0x03
114 #define     INTER_8X8       0x03
115 #define     INTER_16X8      0x01
116 #define     INTER_8X16      0x02
117 #define     SUBMB_SHAPE_MASK    0x00FF00
118
119 #define     INTER_MV8       (4 << 20)
120 #define     INTER_MV32      (6 << 20)
121
122
123 /* HEVC */
124
125 /* utils */
126 static void
127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
128 {
129     /* Flat_4x4_16 */
130     memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
131
132     /* Flat_8x8_16 */
133     memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
134
135     /* Flat_16x16_16 */
136     memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
137
138     /* Flat_32x32_16 */
139     memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
140
141     /* Flat_16x16_dc_16 */
142     memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
143
144     /* Flat_32x32_dc_16 */
145     memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
146 }
147
148 /* HEVC picture and slice state related */
149
150 static void
151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
152                            int standard_select,
153                            struct intel_encoder_context *encoder_context)
154 {
155     struct i965_driver_data *i965 = i965_driver_data(ctx);
156     struct intel_batchbuffer *batch = encoder_context->base.batch;
157
158     assert(standard_select == HCP_CODEC_HEVC);
159
160     if(IS_KBL(i965->intel.device_info))
161     {
162         BEGIN_BCS_BATCH(batch, 6);
163
164         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
165     }
166     else
167     {
168         BEGIN_BCS_BATCH(batch, 4);
169
170         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
171     }
172
173     OUT_BCS_BATCH(batch,
174                   (standard_select << 5) |
175                   (0 << 3) | /* disable Pic Status / Error Report */
176                   HCP_CODEC_SELECT_ENCODE);
177     OUT_BCS_BATCH(batch, 0);
178     OUT_BCS_BATCH(batch, 0);
179
180     if(IS_KBL(i965->intel.device_info))
181     {
182         OUT_BCS_BATCH(batch, 0);
183         OUT_BCS_BATCH(batch, 0);
184     }
185
186     ADVANCE_BCS_BATCH(batch);
187 }
188
189 static void
190 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
191                         struct intel_encoder_context *encoder_context)
192 {
193     struct intel_batchbuffer *batch = encoder_context->base.batch;
194     struct object_surface *obj_surface = encode_state->reconstructed_object;
195     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
196     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
197     unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8;
198
199     /* to do */
200     unsigned int y_cb_offset;
201
202     assert(obj_surface);
203
204     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
205         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
206     {
207         assert(obj_surface->fourcc == VA_FOURCC_P010);
208         surface_format = SURFACE_FORMAT_P010;
209     }
210
211     y_cb_offset = obj_surface->y_cb_offset;
212
213     BEGIN_BCS_BATCH(batch, 3);
214     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
215     OUT_BCS_BATCH(batch,
216                   (1 << 28) |                   /* surface id */
217                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
218     OUT_BCS_BATCH(batch,
219                   surface_format << 28 |
220                   y_cb_offset);
221     ADVANCE_BCS_BATCH(batch);
222
223     BEGIN_BCS_BATCH(batch, 3);
224     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
225     OUT_BCS_BATCH(batch,
226                   (0 << 28) |                   /* surface id */
227                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
228     OUT_BCS_BATCH(batch,
229                   surface_format << 28 |
230                   y_cb_offset);
231     ADVANCE_BCS_BATCH(batch);
232 }
233
234 static void
235 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
236                               struct intel_encoder_context *encoder_context)
237 {
238     struct i965_driver_data *i965 = i965_driver_data(ctx);
239     struct intel_batchbuffer *batch = encoder_context->base.batch;
240     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
241     struct object_surface *obj_surface;
242     GenHevcSurface *hcpe_hevc_surface;
243     dri_bo *bo;
244     unsigned int i;
245
246     if(IS_KBL(i965->intel.device_info))
247     {
248         BEGIN_BCS_BATCH(batch, 104);
249
250         OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
251     }
252     else
253     {
254         BEGIN_BCS_BATCH(batch, 95);
255
256         OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
257     }
258
259     obj_surface = encode_state->reconstructed_object;
260     assert(obj_surface && obj_surface->bo);
261     hcpe_hevc_surface = obj_surface->private_data;
262     assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
263
264     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
265     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
266     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
267     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
268     OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo);         /* DW 13..15 */
269     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
270     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
271     OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo);              /* DW 22..24 */
272     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
273     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
274     OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
275     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
276
277     /* here only max 8 reference allowed */
278     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
279         bo = mfc_context->reference_surfaces[i].bo;
280
281         if (bo) {
282             OUT_BUFFER_NMA_REFERENCE(bo);
283         } else
284             OUT_BUFFER_NMA_REFERENCE(NULL);
285     }
286     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
287
288     OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
289     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore  */
290     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore  */
291     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore  */
292
293     for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
294         bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
295
296         if (bo) {
297             OUT_BUFFER_NMA_REFERENCE(bo);
298         } else
299             OUT_BUFFER_NMA_REFERENCE(NULL);
300     }
301     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
302
303     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
304     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
305     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
306     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
307
308     if(IS_KBL(i965->intel.device_info))
309     {
310         for(i = 0;i < 9;i++)
311             OUT_BCS_BATCH(batch, 0);
312     }
313
314     ADVANCE_BCS_BATCH(batch);
315 }
316
317 static void
318 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
319                                   struct intel_encoder_context *encoder_context)
320 {
321     struct intel_batchbuffer *batch = encoder_context->base.batch;
322     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
323
324     /* to do */
325     BEGIN_BCS_BATCH(batch, 14);
326
327     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
328     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 1..3 igonre for encoder*/
329     OUT_BUFFER_NMA_REFERENCE(NULL);                /* DW 4..5, Upper Bound */
330     OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo);                 /* DW 6..8, CU */
331     /* DW 9..11, PAK-BSE */
332     OUT_BCS_RELOC(batch,
333                   mfc_context->hcp_indirect_pak_bse_object.bo,
334                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335                   mfc_context->hcp_indirect_pak_bse_object.offset);
336     OUT_BCS_BATCH(batch, 0);
337     OUT_BCS_BATCH(batch, 0);
338     OUT_BCS_RELOC(batch,
339                   mfc_context->hcp_indirect_pak_bse_object.bo,
340                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
341                   mfc_context->hcp_indirect_pak_bse_object.end_offset);
342     OUT_BCS_BATCH(batch, 0);
343
344     ADVANCE_BCS_BATCH(batch);
345 }
346
347 static void
348 gen9_hcpe_fqm_state(VADriverContextP ctx,
349                     int size_id,
350                     int color_component,
351                     int pred_type,
352                     int dc,
353                     unsigned int *fqm,
354                     int fqm_length,
355                     struct intel_encoder_context *encoder_context)
356 {
357     struct intel_batchbuffer *batch = encoder_context->base.batch;
358     unsigned int fqm_buffer[32];
359
360     assert(fqm_length <= 32);
361     assert(sizeof(*fqm) == 4);
362     memset(fqm_buffer, 0, sizeof(fqm_buffer));
363     memcpy(fqm_buffer, fqm, fqm_length * 4);
364
365     BEGIN_BCS_BATCH(batch, 34);
366
367     OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
368     OUT_BCS_BATCH(batch,
369                   dc << 16 |
370                   color_component << 3 |
371                   size_id << 1 |
372                   pred_type);
373     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
374
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378
379 static void
380 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
381 {
382     unsigned int qm[32] = {
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000,
386         0x10001000, 0x10001000, 0x10001000, 0x10001000,
387         0x10001000, 0x10001000, 0x10001000, 0x10001000,
388         0x10001000, 0x10001000, 0x10001000, 0x10001000,
389         0x10001000, 0x10001000, 0x10001000, 0x10001000,
390         0x10001000, 0x10001000, 0x10001000, 0x10001000
391     };
392
393     gen9_hcpe_fqm_state(ctx,
394                         0, 0, 0, 0,
395                         qm, 8,
396                         encoder_context);
397     gen9_hcpe_fqm_state(ctx,
398                         0, 0, 1, 0,
399                         qm, 8,
400                         encoder_context);
401     gen9_hcpe_fqm_state(ctx,
402                         1, 0, 0, 0,
403                         qm, 32,
404                         encoder_context);
405     gen9_hcpe_fqm_state(ctx,
406                         1, 0, 1, 0,
407                         qm, 32,
408                         encoder_context);
409     gen9_hcpe_fqm_state(ctx,
410                         2, 0, 0, 0x1000,
411                         qm, 0,
412                         encoder_context);
413     gen9_hcpe_fqm_state(ctx,
414                         2, 0, 1, 0x1000,
415                         qm, 0,
416                         encoder_context);
417     gen9_hcpe_fqm_state(ctx,
418                         3, 0, 0, 0x1000,
419                         qm, 0,
420                         encoder_context);
421     gen9_hcpe_fqm_state(ctx,
422                         3, 0, 1, 0x1000,
423                         qm, 0,
424                         encoder_context);
425 }
426
427 static void
428 gen9_hcpe_qm_state(VADriverContextP ctx,
429                    int size_id,
430                    int color_component,
431                    int pred_type,
432                    int dc,
433                    unsigned int *qm,
434                    int qm_length,
435                    struct intel_encoder_context *encoder_context)
436 {
437     struct intel_batchbuffer *batch = encoder_context->base.batch;
438     unsigned int qm_buffer[16];
439
440     assert(qm_length <= 16);
441     assert(sizeof(*qm) == 4);
442     memset(qm_buffer, 0, sizeof(qm_buffer));
443     memcpy(qm_buffer, qm, qm_length * 4);
444
445     BEGIN_BCS_BATCH(batch, 18);
446
447     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
448     OUT_BCS_BATCH(batch,
449                   dc << 5 |
450                   color_component << 3 |
451                   size_id << 1 |
452                   pred_type);
453     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
454
455     ADVANCE_BCS_BATCH(batch);
456 }
457
458 static void
459 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
460 {
461
462     int i;
463
464     unsigned int qm[16] = {
465         0x10101010, 0x10101010, 0x10101010, 0x10101010,
466         0x10101010, 0x10101010, 0x10101010, 0x10101010,
467         0x10101010, 0x10101010, 0x10101010, 0x10101010,
468         0x10101010, 0x10101010, 0x10101010, 0x10101010
469     };
470
471     for (i = 0; i < 6; i++) {
472         gen9_hcpe_qm_state(ctx,
473                            0, i % 3, i / 3, 0,
474                            qm, 4,
475                            encoder_context);
476     }
477
478     for (i = 0; i < 6; i++) {
479         gen9_hcpe_qm_state(ctx,
480                            1, i % 3, i / 3, 0,
481                            qm, 16,
482                            encoder_context);
483     }
484
485     for (i = 0; i < 6; i++) {
486         gen9_hcpe_qm_state(ctx,
487                            2, i % 3, i / 3, 16,
488                            qm, 16,
489                            encoder_context);
490     }
491
492     for (i = 0; i < 2; i++) {
493         gen9_hcpe_qm_state(ctx,
494                            3, 0, i % 2, 16,
495                            qm, 16,
496                            encoder_context);
497     }
498 }
499
500 static void
501 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
502                          struct intel_encoder_context *encoder_context)
503 {
504     struct i965_driver_data *i965 = i965_driver_data(ctx);
505     struct intel_batchbuffer *batch = encoder_context->base.batch;
506     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
507     VAEncPictureParameterBufferHEVC *pic_param ;
508     VAEncSequenceParameterBufferHEVC *seq_param ;
509
510     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
511     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
512     /*
513      * 7.4.3.1
514      *
515      * When not present, the value of loop_filter_across_tiles_enabled_flag
516      * is inferred to be equal to 1.
517      */
518     int loop_filter_across_tiles_enabled_flag = 0;
519     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
520     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
521
522     int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
523     int log2_ctb_size =  seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
524     int ctb_size = 1 << log2_ctb_size;
525     double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
526     int maxctubits = (int)(5 * rawctubits / 3) ;
527     double bitrate = seq_param->bits_per_second * 1.0;
528     double framebitrate = bitrate / 32 / 8; //32 byte unit
529     int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
530     int maxframebitrate = (int)(framebitrate * 10 / 10);
531     int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
532     int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
533     int minframesize = 0;//(int)(rawframebits * 1/50);
534
535     if (seq_param->seq_fields.bits.pcm_enabled_flag) {
536         max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
537         min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
538         pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
539         pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
540     } else {
541         max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
542     }
543
544     if (pic_param->pic_fields.bits.tiles_enabled_flag)
545         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
546
547     /* set zero for encoder */
548     loop_filter_across_tiles_enabled_flag = 0;
549
550     if(IS_KBL(i965->intel.device_info))
551     {
552         BEGIN_BCS_BATCH(batch, 31);
553
554         OUT_BCS_BATCH(batch, HCP_PIC_STATE | (31 - 2));
555     }
556     else
557     {
558         BEGIN_BCS_BATCH(batch, 19);
559
560         OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
561     }
562
563     OUT_BCS_BATCH(batch,
564                   mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
565                   0 << 14 |
566                   mfc_context->pic_size.picture_width_in_min_cb_minus1);
567     OUT_BCS_BATCH(batch,
568                   max_pcm_size_minus3 << 10 |
569                   min_pcm_size_minus3 << 8 |
570                   (seq_param->log2_min_transform_block_size_minus2 +
571                    seq_param->log2_diff_max_min_transform_block_size) << 6 |
572                   seq_param->log2_min_transform_block_size_minus2 << 4 |
573                   (seq_param->log2_min_luma_coding_block_size_minus3 +
574                    seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
575                   seq_param->log2_min_luma_coding_block_size_minus3);
576     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
577     OUT_BCS_BATCH(batch,
578                   (IS_KBL(i965->intel.device_info)? 1 : 0) << 27 | /* CU packet structure is 0 for SKL */
579                   seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
580                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
581                   seq_param->seq_fields.bits.amp_enabled_flag << 23 |
582                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
583                   0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
584                   0 << 20 |     /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
585                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
586                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
587                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |                 /* 0 for encoder */
588                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
589                   loop_filter_across_tiles_enabled_flag << 15 |
590                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |  /* 0 for encoder */
591                   pic_param->log2_parallel_merge_level_minus2 << 10 |               /* 0 for encoder */
592                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |     /* 0 for encoder */
593                   seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
594                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |                 /* 0 for encoder */
595                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |        /* 0 for encoder */
596                   seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
597                   seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
598                   0);
599     OUT_BCS_BATCH(batch,
600                   seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 |                 /* 10 bit for KBL+*/
601                   seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 |                 /* 10 bit for KBL+ */
602                   pcm_sample_bit_depth_luma_minus1 << 20 |
603                   pcm_sample_bit_depth_chroma_minus1 << 16 |
604                   seq_param->max_transform_hierarchy_depth_inter << 13 |    /*  for encoder */
605                   seq_param->max_transform_hierarchy_depth_intra << 10 |    /*  for encoder */
606                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
607                   (pic_param->pps_cb_qp_offset & 0x1f));
608     OUT_BCS_BATCH(batch,
609                   0 << 29 | /* must be 0 for encoder */
610                   maxctubits); /* DW 6, max LCU bit size allowed for encoder  */
611     OUT_BCS_BATCH(batch,
612                   0 << 31 | /* frame bitrate max unit */
613                   maxframebitrate); /* DW 7, frame bitrate max 0:13   */
614     OUT_BCS_BATCH(batch,
615                   0 << 31 | /* frame bitrate min unit */
616                   minframebitrate); /* DW 8, frame bitrate min 0:13   */
617     OUT_BCS_BATCH(batch,
618                   maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
619                   mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
620     OUT_BCS_BATCH(batch, 0x07050402);   /* DW 10, frame delta qp max */
621     OUT_BCS_BATCH(batch, 0x0d0b0908);
622     OUT_BCS_BATCH(batch, 0);    /* DW 12, frame delta qp min */
623     OUT_BCS_BATCH(batch, 0);
624     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 14, frame delta qp max range  */
625     OUT_BCS_BATCH(batch, 0x100c0806);   /* DW 15 */
626     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 16, frame delta qp min range  */
627     OUT_BCS_BATCH(batch, 0x100c0806);
628     OUT_BCS_BATCH(batch,
629                   0 << 30 |
630                   minframesize);    /* DW 18, min frame size units */
631
632     if(IS_KBL(i965->intel.device_info))
633     {
634         int i = 0;
635
636         for(i = 0;i < 12;i++)
637             OUT_BCS_BATCH(batch, 0);
638     }
639
640     ADVANCE_BCS_BATCH(batch);
641 }
642
643
644 static void
645 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
646                              unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
647                              int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
648                              struct intel_batchbuffer *batch)
649 {
650     if (batch == NULL)
651         batch = encoder_context->base.batch;
652
653     if (data_bits_in_last_dw == 0)
654         data_bits_in_last_dw = 32;
655
656     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
657
658     OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
659     OUT_BCS_BATCH(batch,
660                   (0 << 31) |   /* inline payload */
661                   (0 << 16) |   /* always start at offset 0 */
662                   (0 << 15) |   /* HeaderLengthExcludeFrmSize */
663                   (data_bits_in_last_dw << 8) |
664                   (skip_emul_byte_count << 4) |
665                   (!!emulation_flag << 3) |
666                   ((!!is_last_header) << 2) |
667                   ((!!is_end_of_slice) << 1) |
668                   (0 << 0));    /* Reserved */
669     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
670
671     ADVANCE_BCS_BATCH(batch);
672 }
673 /*
674 // To be do: future
675 static uint8_t
676 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
677 {
678     unsigned int is_long_term =
679         !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
680     unsigned int is_top_field =
681         !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
682     unsigned int is_bottom_field =
683         !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
684
685     return ((is_long_term                         << 6) |
686             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
687             (frame_store_id                       << 1) |
688             ((is_top_field ^ 1) & is_bottom_field));
689 }
690 */
691 static void
692 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
693                           int list,
694                           struct intel_encoder_context *encoder_context,
695                           struct encode_state *encode_state)
696 {
697     int i;
698     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
699     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
700     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
701     VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
702     struct gen6_vme_context *vme_context = encoder_context->vme_context;
703     struct object_surface *obj_surface;
704     int frame_index;
705
706     int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
707
708     if (ref_idx_l0 > 3) {
709         WARN_ONCE("ref_idx_l0 is out of range\n");
710         ref_idx_l0 = 0;
711     }
712
713     obj_surface = vme_context->used_reference_objects[list];
714     frame_index = -1;
715     for (i = 0; i < 16; i++) {
716         if (obj_surface &&
717             obj_surface == encode_state->reference_objects[i]) {
718             frame_index = i;
719             break;
720         }
721     }
722     if (frame_index == -1) {
723         WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
724     }
725
726     BEGIN_BCS_BATCH(batch, 18);
727
728     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
729     OUT_BCS_BATCH(batch,
730                   num_ref_minus1 << 1 |
731                   list);
732
733     for (i = 0; i < 16; i++) {
734         if (i < MIN((num_ref_minus1 + 1), 15)) {
735             VAPictureHEVC *ref_pic = &ref_list[i];
736             VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
737
738             OUT_BCS_BATCH(batch,
739                           1 << 15 |         /* bottom_field_flag 0 */
740                           0 << 14 |         /* field_pic_flag 0 */
741                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |  /* short term is 1 */
742                           0 << 12 | /* disable WP */
743                           0 << 11 | /* disable WP */
744                           frame_index << 8 |
745                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
746         } else {
747             OUT_BCS_BATCH(batch, 0);
748         }
749     }
750
751     ADVANCE_BCS_BATCH(batch);
752 }
753
754 void
755 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
756                               struct encode_state *encode_state,
757                               struct intel_encoder_context *encoder_context
758                              )
759 {
760     struct intel_batchbuffer *batch = encoder_context->base.batch;
761     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
762
763     if (slice_param->slice_type == HEVC_SLICE_I)
764         return;
765
766     gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
767
768     if (slice_param->slice_type == HEVC_SLICE_P)
769         return;
770
771     gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
772 }
773
774 static void
775 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
776                            VAEncPictureParameterBufferHEVC *pic_param,
777                            VAEncSliceParameterBufferHEVC *slice_param,
778                            struct encode_state *encode_state,
779                            struct intel_encoder_context *encoder_context,
780                            struct intel_batchbuffer *batch)
781 {
782     struct i965_driver_data *i965 = i965_driver_data(ctx);
783     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
784     int slice_type = slice_param->slice_type;
785
786     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
787     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
788     int ctb_size = 1 << log2_ctb_size;
789     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
790     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
791     int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
792
793     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
794
795     slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
796     slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
797
798     next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
799     next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
800
801     /* only support multi slice begin from row start address */
802     assert((slice_param->slice_segment_address % width_in_ctb) == 0);
803
804     if (last_slice == 1) {
805         if (slice_param->slice_segment_address == 0) {
806             next_slice_hor_pos = 0;
807             next_slice_ver_pos = height_in_ctb;
808         } else {
809             next_slice_hor_pos = 0;
810             next_slice_ver_pos = 0;
811         }
812     }
813
814     if(IS_KBL(i965->intel.device_info))
815     {
816         BEGIN_BCS_BATCH(batch, 11);
817
818         OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (11 - 2));
819     }
820     else
821     {
822         BEGIN_BCS_BATCH(batch, 9);
823
824         OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
825     }
826
827     OUT_BCS_BATCH(batch,
828                   slice_ver_pos << 16 |
829                   slice_hor_pos);
830     OUT_BCS_BATCH(batch,
831                   next_slice_ver_pos << 16 |
832                   next_slice_hor_pos);
833     OUT_BCS_BATCH(batch,
834                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
835                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
836                   (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
837                   slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
838                   slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
839                   last_slice << 2 |
840                   slice_type);
841     OUT_BCS_BATCH(batch,
842                   0 << 26 |
843                   (slice_param->max_num_merge_cand - 1)  << 23 |
844                   slice_param->slice_fields.bits.cabac_init_flag << 22 |
845                   slice_param->luma_log2_weight_denom << 19 |
846                   (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
847                   slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
848                   (slice_type != HEVC_SLICE_B) << 14 |
849                   slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
850                   slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
851                   slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
852                   slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
853                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
854                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
855                   slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
856     OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
857     OUT_BCS_BATCH(batch,
858                   4 << 26 |
859                   4 << 20 |
860                   0);
861     OUT_BCS_BATCH(batch,
862                   1 << 10 |  /* header insertion enable */
863                   1 << 9  |  /* slice data enable */
864                   1 << 8  |  /* tail insertion enable, must at end of frame, not slice */
865                   1 << 2  |  /* RBSP or EBSP, EmulationByteSliceInsertEnable */
866                   1 << 1  |  /* cabacZeroWordInsertionEnable */
867                   0);        /* Ignored for decoding */
868     OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
869
870     if(IS_KBL(i965->intel.device_info))
871     {
872         OUT_BCS_BATCH(batch, 0);
873         OUT_BCS_BATCH(batch, 0);
874     }
875
876     ADVANCE_BCS_BATCH(batch);
877 }
878
879 /* HEVC pipe line related */
880 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
881         struct encode_state *encode_state,
882         struct intel_encoder_context *encoder_context)
883 {
884     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
885
886     mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
887     mfc_context->set_surface_state(ctx, encode_state, encoder_context);
888     gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
889     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
890
891     mfc_context->qm_state(ctx, encoder_context);
892     mfc_context->fqm_state(ctx, encoder_context);
893     mfc_context->pic_state(ctx, encode_state, encoder_context);
894     intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
895 }
896
897 static void gen9_hcpe_init(VADriverContextP ctx,
898                            struct encode_state *encode_state,
899                            struct intel_encoder_context *encoder_context)
900 {
901     /* to do */
902     struct i965_driver_data *i965 = i965_driver_data(ctx);
903     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
904     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
905     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
906     dri_bo *bo;
907     int i, size = 0;
908     int slice_batchbuffer_size;
909     int slice_type = slice_param->slice_type;
910     int is_inter = (slice_type != HEVC_SLICE_I);
911
912     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
913     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
914     int ctb_size = 1 << log2_ctb_size;
915     int cu_size  = 1 << log2_cu_size;
916
917     int width_in_ctb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
918     int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
919     int width_in_cu  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
920     int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
921     int width_in_mb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
922     int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
923
924     int num_cu_record = 64;
925     int size_shift = 3;
926
927     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
928         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
929         size_shift = 2;
930
931     if (log2_ctb_size == 5) num_cu_record = 16;
932     else if (log2_ctb_size == 4) num_cu_record = 4;
933     else if (log2_ctb_size == 6) num_cu_record = 64;
934
935     /* frame size in samples, cu,ctu, mb */
936     mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
937     mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
938     mfc_context->pic_size.ctb_size = ctb_size;
939     mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
940     mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
941     mfc_context->pic_size.min_cb_size = cu_size;
942     mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
943     mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
944     mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
945     mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
946
947     slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
948                              (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
949
950     /*Encode common setup for HCP*/
951     /*deblocking */
952     dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
953     mfc_context->deblocking_filter_line_buffer.bo = NULL;
954
955     dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
956     mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
957
958     dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
959     mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
960
961     /* input source */
962     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
963     mfc_context->uncompressed_picture_source.bo = NULL;
964
965     /* metadata */
966     dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
967     mfc_context->metadata_line_buffer.bo = NULL;
968
969     dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
970     mfc_context->metadata_tile_line_buffer.bo = NULL;
971
972     dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
973     mfc_context->metadata_tile_column_buffer.bo = NULL;
974
975     /* sao */
976     dri_bo_unreference(mfc_context->sao_line_buffer.bo);
977     mfc_context->sao_line_buffer.bo = NULL;
978
979     dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
980     mfc_context->sao_tile_line_buffer.bo = NULL;
981
982     dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
983     mfc_context->sao_tile_column_buffer.bo = NULL;
984
985     /* mv temporal buffer */
986     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
987         if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
988             dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
989         mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
990     }
991
992     /* reference */
993     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
994         if (mfc_context->reference_surfaces[i].bo != NULL)
995             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
996         mfc_context->reference_surfaces[i].bo = NULL;
997     }
998
999     /* indirect data CU recording */
1000     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1001     mfc_context->hcp_indirect_cu_object.bo = NULL;
1002
1003     dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
1004     mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
1005
1006     /* Current internal buffer for HCP */
1007
1008     size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift;
1009     size <<= 6;
1010     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
1011     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1012
1013     size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift;
1014     size <<= 6;
1015     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1016
1017     if (is_inter) {
1018         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
1019         size <<= 6;
1020         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1021
1022         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
1023         size <<= 6;
1024         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1025
1026         size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
1027         size <<= 6;
1028         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1029     } else {
1030         size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
1031         size <<= 6;
1032         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1033
1034         size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
1035         size <<= 6;
1036         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1037
1038         size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
1039         size <<= 6;
1040         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1041     }
1042
1043     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift;
1044     size <<= 6;
1045     ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
1046
1047     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift;
1048     size <<= 6;
1049     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
1050
1051     size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift;
1052     size <<= 6;
1053     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
1054
1055     /////////////////////
1056     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1057     bo = dri_bo_alloc(i965->intel.bufmgr,
1058                       "Indirect data CU Buffer",
1059                       width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
1060                       0x1000);
1061     assert(bo);
1062     mfc_context->hcp_indirect_cu_object.bo = bo;
1063
1064     /* to do pak bse object buffer */
1065     /* to do current collocated mv temporal buffer */
1066
1067     dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
1068     mfc_context->hcp_batchbuffer_surface.bo = NULL;
1069
1070     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1071     mfc_context->aux_batchbuffer_surface.bo = NULL;
1072
1073     if (mfc_context->aux_batchbuffer)
1074         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1075
1076     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
1077     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
1078     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1079     mfc_context->aux_batchbuffer_surface.pitch = 16;
1080     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1081     mfc_context->aux_batchbuffer_surface.size_block = 16;
1082 }
1083
1084 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1085                               struct encode_state *encode_state,
1086                               struct intel_encoder_context *encoder_context)
1087 {
1088     struct intel_batchbuffer *batch = encoder_context->base.batch;
1089
1090     intel_batchbuffer_flush(batch);     //run the pipeline
1091
1092     return VA_STATUS_SUCCESS;
1093 }
1094
1095
1096 static VAStatus
1097 gen9_hcpe_stop(VADriverContextP ctx,
1098                struct encode_state *encode_state,
1099                struct intel_encoder_context *encoder_context,
1100                int *encoded_bits_size)
1101 {
1102     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1103     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1104     VACodedBufferSegment *coded_buffer_segment;
1105
1106     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1107     assert(vaStatus == VA_STATUS_SUCCESS);
1108     *encoded_bits_size = coded_buffer_segment->size * 8;
1109     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1110
1111     return VA_STATUS_SUCCESS;
1112 }
1113
1114
1115 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1116 {
1117     /* to do */
1118     int i, found;
1119     int leading_zero_cnt, byte_length, zero_byte;
1120     int nal_unit_type;
1121     int skip_cnt = 0;
1122
1123 #define NAL_UNIT_TYPE_MASK 0x7e
1124 #define HW_MAX_SKIP_LENGTH 15
1125
1126     byte_length = ALIGN(bits_length, 32) >> 3;
1127
1128
1129     leading_zero_cnt = 0;
1130     found = 0;
1131     for (i = 0; i < byte_length - 4; i++) {
1132         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1133             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1134             found = 1;
1135             break;
1136         }
1137         leading_zero_cnt++;
1138     }
1139     if (!found) {
1140         /* warning message is complained. But anyway it will be inserted. */
1141         WARN_ONCE("Invalid packed header data. "
1142                   "Can't find the 000001 start_prefix code\n");
1143         return 0;
1144     }
1145     i = leading_zero_cnt;
1146
1147     zero_byte = 0;
1148     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1149         zero_byte = 1;
1150
1151     skip_cnt = leading_zero_cnt + zero_byte + 3;
1152
1153     /* the unit header byte is accounted */
1154     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1155     skip_cnt += 1;
1156     skip_cnt += 1;  /* two bytes length of nal headers in hevc */
1157
1158     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1159         /* more unit header bytes are accounted for MVC/SVC */
1160         //skip_cnt += 3;
1161     }
1162     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1163         WARN_ONCE("Too many leading zeros are padded for packed data. "
1164                   "It is beyond the HW range.!!!\n");
1165     }
1166     return skip_cnt;
1167 }
1168
1169 #ifdef HCP_SOFTWARE_SKYLAKE
1170
1171 static int
1172 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1173                           struct intel_encoder_context *encoder_context,
1174                           int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1175                           struct intel_batchbuffer *batch)
1176 {
1177     struct i965_driver_data *i965 = i965_driver_data(ctx);
1178     int len_in_dwords = 3;
1179
1180     if(IS_KBL(i965->intel.device_info))
1181         len_in_dwords = 5;
1182
1183     if (batch == NULL)
1184         batch = encoder_context->base.batch;
1185
1186     BEGIN_BCS_BATCH(batch, len_in_dwords);
1187
1188     OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1189     OUT_BCS_BATCH(batch,
1190                   (((isLast_ctb > 0) ? 1 : 0) << 31) |  /* last ctb?*/
1191                   ((cu_count_in_lcu - 1) << 24) |           /* No motion vector */
1192                   split_coding_unit_flag);
1193
1194     OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x);        /* LCU  for Y*/
1195
1196     if(IS_KBL(i965->intel.device_info))
1197     {
1198         OUT_BCS_BATCH(batch, 0);
1199         OUT_BCS_BATCH(batch, 0);
1200     }
1201
1202     ADVANCE_BCS_BATCH(batch);
1203
1204     return len_in_dwords;
1205 }
1206
1207 #define     AVC_INTRA_RDO_OFFSET    4
1208 #define     AVC_INTER_RDO_OFFSET    10
1209 #define     AVC_INTER_MSG_OFFSET    8
1210 #define     AVC_INTER_MV_OFFSET     48
1211 #define     AVC_RDO_MASK            0xFFFF
1212
1213 #define     AVC_INTRA_MODE_MASK     0x30
1214 #define     AVC_INTRA_16X16         0x00
1215 #define     AVC_INTRA_8X8           0x01
1216 #define     AVC_INTRA_4X4           0x02
1217
1218 #define     AVC_INTER_MODE_MASK     0x03
1219 #define     AVC_INTER_8X8           0x03
1220 #define     AVC_INTER_8X16          0x02
1221 #define     AVC_INTER_16X8          0x01
1222 #define     AVC_INTER_16X16         0x00
1223 #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
1224
1225 /* VME output message, write back message */
1226 #define     AVC_INTER_SUBMB_PRE_MODE_MASK       0x00ff0000
1227 #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
1228
1229 /* here 1 MB = 1CU = 16x16 */
1230 static void
1231 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1232                                       struct encode_state *encode_state,
1233                                       struct intel_encoder_context *encoder_context,
1234                                       int qp, unsigned int *msg,
1235                                       int ctb_x, int ctb_y,
1236                                       int mb_x, int mb_y,
1237                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type,int cu_index,int index)
1238 {
1239     /* here cu == mb, so we use mb address as the cu address */
1240     /* to fill the indirect cu by the vme out */
1241     static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1242     static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1243     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1244     unsigned char * cu_record_ptr = NULL;
1245     unsigned int * cu_msg = NULL;
1246     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1247     int mb_address_in_ctb = 0;
1248     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1249     int zero = 0;
1250     int is_inter = 0;
1251     int intraMbMode = 0;
1252     int cu_part_mode = 0;
1253     int intraMode[4];
1254     int inerpred_idc = 0;
1255     int intra_chroma_mode = 5;
1256     int cu_size = 1;
1257     int tu_size = 0x55;
1258     int tu_count = 4;
1259     int chroma_mode_remap[4]={5,4,3,2};
1260
1261     if (!is_inter) inerpred_idc = 0xff;
1262
1263     intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1264
1265     intra_chroma_mode = (msg[3] & 0x3);
1266     intra_chroma_mode =  chroma_mode_remap[intra_chroma_mode];
1267     if (intraMbMode == AVC_INTRA_16X16) {
1268         cu_part_mode = 0; //2Nx2N
1269         cu_size = 1;
1270         tu_size = 0x55;
1271         tu_count = 4;
1272         intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1273         intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1274         intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1275         intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1276     } else if (intraMbMode == AVC_INTRA_8X8) {
1277         cu_part_mode = 0; //2Nx2N
1278         cu_size = 0;
1279         tu_size = 0;
1280         tu_count = 4;
1281         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1282         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1283         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1284         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1285
1286     } else { // for 4x4 to use 8x8 replace
1287         cu_part_mode = 3; //NxN
1288         cu_size = 0;
1289         tu_size = 0;
1290         tu_count = 4;
1291         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1292         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1293         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1294         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1295
1296     }
1297
1298     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1299     /* get the mb info from the vme out */
1300     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1301
1302     cu_msg[0] = (inerpred_idc << 24 |   /* interpred_idc[3:0][1:0] */
1303                  zero << 23 |   /* reserved */
1304                  qp << 16 | /* CU_qp */
1305                  zero << 11 |   /* reserved */
1306                  intra_chroma_mode << 8 |   /* intra_chroma_mode */
1307                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
1308                  cu_part_mode << 4 |    /* cu_part_mode */
1309                  zero << 3 |    /* cu_transquant_bypass_flag */
1310                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
1311                  cu_size          /* cu_size */
1312                 );
1313     cu_msg[1] = (zero << 30 |   /* reserved  */
1314                  intraMode[3] << 24 |   /* intra_mode */
1315                  zero << 22 |   /* reserved  */
1316                  intraMode[2] << 16 |   /* intra_mode */
1317                  zero << 14 |   /* reserved  */
1318                  intraMode[1] << 8 |    /* intra_mode */
1319                  zero << 6 |    /* reserved  */
1320                  intraMode[0]           /* intra_mode */
1321                 );
1322     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1323     cu_msg[2] = (zero << 16 |   /* mvx_l0[1]  */
1324                  zero           /* mvx_l0[0] */
1325                 );
1326     cu_msg[3] = (zero << 16 |   /* mvx_l0[3]  */
1327                  zero           /* mvx_l0[2] */
1328                 );
1329     cu_msg[4] = (zero << 16 |   /* mvy_l0[1]  */
1330                  zero           /* mvy_l0[0] */
1331                 );
1332     cu_msg[5] = (zero << 16 |   /* mvy_l0[3]  */
1333                  zero           /* mvy_l0[2] */
1334                 );
1335
1336     cu_msg[6] = (zero << 16 |   /* mvx_l1[1]  */
1337                  zero           /* mvx_l1[0] */
1338                 );
1339     cu_msg[7] = (zero << 16 |   /* mvx_l1[3]  */
1340                  zero           /* mvx_l1[2] */
1341                 );
1342     cu_msg[8] = (zero << 16 |   /* mvy_l1[1]  */
1343                  zero           /* mvy_l1[0] */
1344                 );
1345     cu_msg[9] = (zero << 16 |   /* mvy_l1[3]  */
1346                  zero           /* mvy_l1[2] */
1347                 );
1348
1349     cu_msg[10] = (zero << 28 |  /* ref_idx_l1[3]  */
1350                   zero << 24 |  /* ref_idx_l1[2] */
1351                   zero << 20 |  /* ref_idx_l1[1]  */
1352                   zero << 16 |  /* ref_idx_l1[0] */
1353                   zero << 12 |  /* ref_idx_l0[3]  */
1354                   zero << 8 |   /* ref_idx_l0[2] */
1355                   zero << 4 |   /* ref_idx_l0[1]  */
1356                   zero          /* ref_idx_l0[0] */
1357                  );
1358
1359     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
1360     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1361                   zero << 16 |  /* reserved  */
1362                   zero          /* tu_xform_Yskip[15:0] */
1363                  );
1364     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
1365                   zero          /* tu_xform_Uskip[15:0] */
1366                  );
1367     cu_msg[14] = zero ;
1368     cu_msg[15] = zero ;
1369 }
1370
1371 /* here 1 MB = 1CU = 16x16 */
1372 static void
1373 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1374                                       struct encode_state *encode_state,
1375                                       struct intel_encoder_context *encoder_context,
1376                                       int qp, unsigned int *msg,
1377                                       int ctb_x, int ctb_y,
1378                                       int mb_x, int mb_y,
1379                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index,int index)
1380 {
1381     /* here cu == mb, so we use mb address as the cu address */
1382     /* to fill the indirect cu by the vme out */
1383     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1384     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1385     unsigned char * cu_record_ptr = NULL;
1386     unsigned int * cu_msg = NULL;
1387     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1388     int mb_address_in_ctb = 0;
1389     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1390     int zero = 0;
1391     int cu_part_mode = 0;
1392     int submb_pre_mode = 0;
1393     int is_inter = 1;
1394     int cu_size = 1;
1395     int tu_size = 0x55;
1396     int tu_count = 4;
1397     int inter_mode = 0;
1398
1399     unsigned int *mv_ptr;
1400     {
1401         inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1402         submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1403 #define MSG_MV_OFFSET   4
1404         mv_ptr = msg + MSG_MV_OFFSET;
1405         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1406         * to convert them to be compatible with the format of AVC_PAK
1407         * command.
1408         */
1409         /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1410
1411         if (inter_mode == AVC_INTER_16X16) {
1412             mv_ptr[4] = mv_ptr[0];
1413             mv_ptr[5] = mv_ptr[1];
1414             mv_ptr[2] = mv_ptr[0];
1415             mv_ptr[3] = mv_ptr[1];
1416             mv_ptr[6] = mv_ptr[0];
1417             mv_ptr[7] = mv_ptr[1];
1418             cu_part_mode = 0;
1419             cu_size = 1;
1420             tu_size = 0x55;
1421             tu_count = 4;
1422         } else if (inter_mode == AVC_INTER_8X16) {
1423             mv_ptr[4] = mv_ptr[0];
1424             mv_ptr[5] = mv_ptr[1];
1425             mv_ptr[2] = mv_ptr[8];
1426             mv_ptr[3] = mv_ptr[9];
1427             mv_ptr[6] = mv_ptr[8];
1428             mv_ptr[7] = mv_ptr[9];
1429             cu_part_mode = 1;
1430             cu_size = 1;
1431             tu_size = 0x55;
1432             tu_count = 4;
1433         } else if (inter_mode == AVC_INTER_16X8) {
1434             mv_ptr[2] = mv_ptr[0];
1435             mv_ptr[3] = mv_ptr[1];
1436             mv_ptr[4] = mv_ptr[16];
1437             mv_ptr[5] = mv_ptr[17];
1438             mv_ptr[6] = mv_ptr[24];
1439             mv_ptr[7] = mv_ptr[25];
1440             cu_part_mode = 2;
1441             cu_size = 1;
1442             tu_size = 0x55;
1443             tu_count = 4;
1444         }else if(inter_mode == AVC_INTER_8X8) {
1445             mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1446             mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1447             mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1448             mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1449             mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1450             mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1451             mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1452             mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1453             cu_part_mode = 0;
1454             cu_size = 0;
1455             tu_size = 0x0;
1456             tu_count = 4;
1457
1458         }else
1459         {
1460             mv_ptr[4] = mv_ptr[0];
1461             mv_ptr[5] = mv_ptr[1];
1462             mv_ptr[2] = mv_ptr[0];
1463             mv_ptr[3] = mv_ptr[1];
1464             mv_ptr[6] = mv_ptr[0];
1465             mv_ptr[7] = mv_ptr[1];
1466             cu_part_mode = 0;
1467             cu_size = 1;
1468             tu_size = 0x55;
1469             tu_count = 4;
1470
1471         }
1472     }
1473
1474     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1475     /* get the mb info from the vme out */
1476     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1477
1478     cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1479                  zero << 23 |   /* reserved */
1480                  qp << 16 | /* CU_qp */
1481                  zero << 11 |   /* reserved */
1482                  5 << 8 |   /* intra_chroma_mode */
1483                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
1484                  cu_part_mode << 4 |    /* cu_part_mode */
1485                  zero << 3 |    /* cu_transquant_bypass_flag */
1486                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
1487                  cu_size          /* cu_size */
1488                 );
1489     cu_msg[1] = (zero << 30 |   /* reserved  */
1490                  zero << 24 |   /* intra_mode */
1491                  zero << 22 |   /* reserved  */
1492                  zero << 16 |   /* intra_mode */
1493                  zero << 14 |   /* reserved  */
1494                  zero << 8 |    /* intra_mode */
1495                  zero << 6 |    /* reserved  */
1496                  zero           /* intra_mode */
1497                 );
1498     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1499     cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 |   /* mvx_l0[1]  */
1500                  (mv_ptr[0] & 0xffff)           /* mvx_l0[0] */
1501                 );
1502     cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 |   /* mvx_l0[3]  */
1503                  (mv_ptr[4] & 0xffff)           /* mvx_l0[2] */
1504                 );
1505     cu_msg[4] = ((mv_ptr[2] & 0xffff0000) |         /* mvy_l0[1]  */
1506                  (mv_ptr[0] & 0xffff0000) >> 16     /* mvy_l0[0] */
1507                 );
1508     cu_msg[5] = ((mv_ptr[6] & 0xffff0000) |         /* mvy_l0[3]  */
1509                  (mv_ptr[4] & 0xffff0000) >> 16     /* mvy_l0[2] */
1510                 );
1511
1512     cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 |   /* mvx_l1[1]  */
1513                  (mv_ptr[1] & 0xffff)           /* mvx_l1[0] */
1514                 );
1515     cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 |   /* mvx_l1[3]  */
1516                  (mv_ptr[5] & 0xffff)           /* mvx_l1[2] */
1517                 );
1518     cu_msg[8] = ((mv_ptr[3] & 0xffff0000) |         /* mvy_l1[1]  */
1519                  (mv_ptr[1] & 0xffff0000) >> 16     /* mvy_l1[0] */
1520                 );
1521     cu_msg[9] = ((mv_ptr[7] & 0xffff0000) |         /* mvy_l1[3]  */
1522                  (mv_ptr[5] & 0xffff0000) >> 16     /* mvy_l1[2] */
1523                 );
1524
1525     cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 |   /* ref_idx_l1[3]  */
1526                   ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 |   /* ref_idx_l1[2] */
1527                   ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 |    /* ref_idx_l1[1]  */
1528                   ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 |    /* ref_idx_l1[0] */
1529                   ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 |   /* ref_idx_l0[3]  */
1530                   ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8  |   /* ref_idx_l0[2] */
1531                   ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 |     /* ref_idx_l0[1]  */
1532                   ((vme_context->ref_index_in_mb[0] >> 0) & 0xf)            /* ref_idx_l0[0] */
1533                  );
1534
1535     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
1536     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1537                   zero << 16 |  /* reserved  */
1538                   zero          /* tu_xform_Yskip[15:0] */
1539                  );
1540     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
1541                   zero          /* tu_xform_Uskip[15:0] */
1542                  );
1543     cu_msg[14] = zero ;
1544     cu_msg[15] = zero ;
1545 }
1546
1547 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1548 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1549 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1550 #define HEVC_SPLIT_CU_FLAG_8_8   ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1551
1552
1553 void
1554 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1555                                     struct encode_state *encode_state,
1556                                     struct intel_encoder_context *encoder_context,
1557                                     int slice_index,
1558                                     struct intel_batchbuffer *slice_batch)
1559 {
1560     int count, i, start_index;
1561     unsigned int length_in_bits;
1562     VAEncPackedHeaderParameterBuffer *param = NULL;
1563     unsigned int *header_data = NULL;
1564     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1565     int slice_header_index;
1566
1567     if (encode_state->slice_header_index[slice_index] == 0)
1568         slice_header_index = -1;
1569     else
1570         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1571
1572     count = encode_state->slice_rawdata_count[slice_index];
1573     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1574
1575     for (i = 0; i < count; i++) {
1576         unsigned int skip_emul_byte_cnt;
1577
1578         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1579
1580         param = (VAEncPackedHeaderParameterBuffer *)
1581                 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1582
1583         /* skip the slice header packed data type as it is lastly inserted */
1584         if (param->type == VAEncPackedHeaderSlice)
1585             continue;
1586
1587         length_in_bits = param->bit_length;
1588
1589         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1590
1591         /* as the slice header is still required, the last header flag is set to
1592          * zero.
1593          */
1594         mfc_context->insert_object(ctx,
1595                                    encoder_context,
1596                                    header_data,
1597                                    ALIGN(length_in_bits, 32) >> 5,
1598                                    length_in_bits & 0x1f,
1599                                    skip_emul_byte_cnt,
1600                                    0,
1601                                    0,
1602                                    !param->has_emulation_bytes,
1603                                    slice_batch);
1604     }
1605
1606     if (slice_header_index == -1) {
1607         unsigned char *slice_header = NULL;
1608         int slice_header_length_in_bits = 0;
1609         VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1610         VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1611         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1612
1613         /* For the Normal HEVC */
1614         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1615                                       pPicParameter,
1616                                       pSliceParameter,
1617                                       &slice_header,
1618                                       0);
1619         mfc_context->insert_object(ctx, encoder_context,
1620                                    (unsigned int *)slice_header,
1621                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1622                                    slice_header_length_in_bits & 0x1f,
1623                                    5,  /* first 6 bytes are start code + nal unit type */
1624                                    1, 0, 1, slice_batch);
1625         free(slice_header);
1626     } else {
1627         unsigned int skip_emul_byte_cnt;
1628
1629         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1630
1631         param = (VAEncPackedHeaderParameterBuffer *)
1632                 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1633         length_in_bits = param->bit_length;
1634
1635         /* as the slice header is the last header data for one slice,
1636          * the last header flag is set to one.
1637          */
1638         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1639
1640         mfc_context->insert_object(ctx,
1641                                    encoder_context,
1642                                    header_data,
1643                                    ALIGN(length_in_bits, 32) >> 5,
1644                                    length_in_bits & 0x1f,
1645                                    skip_emul_byte_cnt,
1646                                    1,
1647                                    0,
1648                                    !param->has_emulation_bytes,
1649                                    slice_batch);
1650     }
1651
1652     return;
1653 }
1654
1655 static void
1656 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1657         struct encode_state *encode_state,
1658         struct intel_encoder_context *encoder_context,
1659         int slice_index,
1660         struct intel_batchbuffer *slice_batch)
1661 {
1662     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1663     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1664     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1665     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1666     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1667     int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1668     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1669     //unsigned char *slice_header = NULL;         // for future use
1670     //int slice_header_length_in_bits = 0;
1671     unsigned int tail_data[] = { 0x0, 0x0 };
1672     int slice_type = pSliceParameter->slice_type;
1673
1674     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1675     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1676     int ctb_size = 1 << log2_ctb_size;
1677     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1678     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1679     int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1680     int ctb_width_in_mb = (ctb_size + 15) / 16;
1681     int i_ctb, ctb_x, ctb_y;
1682     unsigned int split_coding_unit_flag = 0;
1683     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1684     int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size)> 0 ? 1:0;
1685     int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size)> 0 ? 1:0;
1686
1687     int is_intra = (slice_type == HEVC_SLICE_I);
1688     unsigned int *msg = NULL;
1689     unsigned char *msg_ptr = NULL;
1690     int macroblock_address = 0;
1691     int num_cu_record = 64;
1692     int cu_count = 1;
1693     int tmp_mb_mode = 0;
1694     int mb_x = 0, mb_y = 0;
1695     int mb_addr = 0;
1696     int cu_index = 0;
1697     int inter_rdo, intra_rdo;
1698     int qp;
1699     int drop_cu_row_in_last_mb = 0;
1700     int drop_cu_column_in_last_mb = 0;
1701
1702     if (log2_ctb_size == 5) num_cu_record = 16;
1703     else if (log2_ctb_size == 4) num_cu_record = 4;
1704     else if (log2_ctb_size == 6) num_cu_record = 64;
1705
1706     qp = qp_slice;
1707     if (rate_control_mode == VA_RC_CBR) {
1708         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1709         if(slice_type == HEVC_SLICE_B) {
1710             if(pSequenceParameter->ip_period == 1)
1711             {
1712                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1713
1714             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
1715                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1716             }
1717         }
1718         if (encode_state->slice_header_index[slice_index] == 0) {
1719             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1720         }
1721     }
1722
1723     /* only support for 8-bit pixel bit-depth */
1724     assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2);
1725     assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2);
1726     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1727     assert(qp >= 0 && qp < 52);
1728
1729     {
1730         gen9_hcpe_hevc_slice_state(ctx,
1731                                    pPicParameter,
1732                                    pSliceParameter,
1733                                    encode_state, encoder_context,
1734                                    slice_batch);
1735
1736         if (slice_index == 0)
1737             intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1738
1739         intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1740
1741         /*
1742         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1743         int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1744
1745         mfc_context->insert_object(ctx, encoder_context,
1746                                    (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1747                                     skip_emul_byte_cnt,
1748                                     1, 0, 1, slice_batch);
1749         free(slice_header);
1750         */
1751     }
1752
1753
1754
1755     split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1756
1757     dri_bo_map(vme_context->vme_output.bo , 1);
1758     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1759     dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1760
1761     for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1762         int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1763         int ctb_height_in_mb_internal = ctb_width_in_mb;
1764         int ctb_width_in_mb_internal = ctb_width_in_mb;
1765         int max_cu_num_in_mb = 4;
1766
1767         ctb_x = i_ctb % width_in_ctb;
1768         ctb_y = i_ctb / width_in_ctb;
1769
1770         drop_cu_row_in_last_mb = 0;
1771         drop_cu_column_in_last_mb = 0;
1772
1773         if(ctb_y == (height_in_ctb - 1) && row_pad_flag)
1774         {
1775             ctb_height_in_mb_internal = (pSequenceParameter->pic_height_in_luma_samples - (ctb_y * ctb_size) + 15)/16;
1776
1777             if((log2_cu_size == 3) && (pSequenceParameter->pic_height_in_luma_samples % 16))
1778                 drop_cu_row_in_last_mb = (16 - (pSequenceParameter->pic_height_in_luma_samples % 16))>>log2_cu_size;
1779         }
1780
1781         if(ctb_x == (width_in_ctb - 1) && col_pad_flag)
1782         {
1783             ctb_width_in_mb_internal = (pSequenceParameter->pic_width_in_luma_samples - (ctb_x * ctb_size) + 15) / 16;
1784
1785             if((log2_cu_size == 3) && (pSequenceParameter->pic_width_in_luma_samples % 16))
1786                 drop_cu_column_in_last_mb = (16 - (pSequenceParameter->pic_width_in_luma_samples % 16))>>log2_cu_size;
1787         }
1788
1789         mb_x = 0;
1790         mb_y = 0;
1791         macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1792         split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1793         cu_count = 1;
1794         cu_index = 0;
1795         mb_addr = 0;
1796         msg = NULL;
1797         for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++)
1798         {
1799             mb_addr = macroblock_address + mb_y * width_in_mbs ;
1800             for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++)
1801             {
1802                 max_cu_num_in_mb = 4;
1803                 if(drop_cu_row_in_last_mb && (mb_y == ctb_height_in_mb_internal - 1))
1804                     max_cu_num_in_mb /= 2;
1805
1806                 if(drop_cu_column_in_last_mb && (mb_x == ctb_width_in_mb_internal - 1))
1807                     max_cu_num_in_mb /= 2;
1808
1809                 /* get the mb info from the vme out */
1810                 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1811
1812                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1813                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1814                 /*fill to indirect cu */
1815                 /*to do */
1816                 if (is_intra || intra_rdo < inter_rdo) {
1817                     /* fill intra cu */
1818                     tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1819                     if(max_cu_num_in_mb < 4){
1820                         if(tmp_mb_mode == AVC_INTRA_16X16)
1821                         {
1822                             msg[0] = (msg[0] & !AVC_INTRA_MODE_MASK) | (AVC_INTRA_8X8<<4);
1823                             tmp_mb_mode = AVC_INTRA_8X8;
1824                         }
1825
1826                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1827                         if(--max_cu_num_in_mb > 0)
1828                             gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1829
1830                         if(ctb_width_in_mb == 2)
1831                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1832                         else if(ctb_width_in_mb == 1)
1833                             split_coding_unit_flag |= 0x1 << 20;
1834                     }
1835                     else if(tmp_mb_mode == AVC_INTRA_16X16) {
1836                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1837                     } else { // for 4x4 to use 8x8 replace
1838                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1839                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1840                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1841                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1842                         if(ctb_width_in_mb == 2)
1843                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1844                         else if(ctb_width_in_mb == 1)
1845                             split_coding_unit_flag |= 0x1 << 20;
1846                     }
1847                 } else {
1848                     msg += AVC_INTER_MSG_OFFSET;
1849                     /* fill inter cu */
1850                     tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1851                     if(max_cu_num_in_mb < 4)
1852                     {
1853                         if(tmp_mb_mode != AVC_INTER_8X8)
1854                         {
1855                             msg[0] = (msg[0] & !AVC_INTER_MODE_MASK) | AVC_INTER_8X8;
1856                             tmp_mb_mode = AVC_INTER_8X8;
1857                         }
1858                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1859                         if(--max_cu_num_in_mb > 0)
1860                             gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1861
1862                         if(ctb_width_in_mb == 2)
1863                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1864                         else if(ctb_width_in_mb == 1)
1865                             split_coding_unit_flag |= 0x1 << 20;
1866                     }
1867                     else if (tmp_mb_mode == AVC_INTER_8X8){
1868                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1869                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1870                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1871                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1872                         if(ctb_width_in_mb == 2)
1873                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1874                         else if(ctb_width_in_mb == 1)
1875                             split_coding_unit_flag |= 0x1 << 20;
1876
1877                     }else if(tmp_mb_mode == AVC_INTER_16X16 ||
1878                         tmp_mb_mode == AVC_INTER_8X16 ||
1879                         tmp_mb_mode == AVC_INTER_16X8) {
1880                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1881                     }
1882                 }
1883                 mb_addr++;
1884             }
1885         }
1886
1887         cu_count = cu_index;
1888         // PAK object fill accordingly.
1889         gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1890     }
1891
1892     dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1893     dri_bo_unmap(vme_context->vme_output.bo);
1894
1895     if (last_slice) {
1896         mfc_context->insert_object(ctx, encoder_context,
1897                                    tail_data, 2, 8,
1898                                    2, 1, 1, 0, slice_batch);
1899     } else {
1900         mfc_context->insert_object(ctx, encoder_context,
1901                                    tail_data, 1, 8,
1902                                    1, 1, 1, 0, slice_batch);
1903     }
1904 }
1905
1906 static dri_bo *
1907 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1908                                     struct encode_state *encode_state,
1909                                     struct intel_encoder_context *encoder_context)
1910 {
1911     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1912     struct intel_batchbuffer *batch;
1913     dri_bo *batch_bo;
1914     int i;
1915
1916     batch = mfc_context->aux_batchbuffer;
1917     batch_bo = batch->buffer;
1918
1919     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1920         gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1921     }
1922
1923     intel_batchbuffer_align(batch, 8);
1924
1925     BEGIN_BCS_BATCH(batch, 2);
1926     OUT_BCS_BATCH(batch, 0);
1927     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1928     ADVANCE_BCS_BATCH(batch);
1929
1930     dri_bo_reference(batch_bo);
1931     intel_batchbuffer_free(batch);
1932     mfc_context->aux_batchbuffer = NULL;
1933
1934     return batch_bo;
1935 }
1936
1937 #else
1938
1939 #endif
1940
1941 static void
1942 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1943                                    struct encode_state *encode_state,
1944                                    struct intel_encoder_context *encoder_context)
1945 {
1946     struct i965_driver_data *i965 = i965_driver_data(ctx);
1947     struct intel_batchbuffer *batch = encoder_context->base.batch;
1948     dri_bo *slice_batch_bo;
1949
1950 #ifdef HCP_SOFTWARE_SKYLAKE
1951     slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1952 #else
1953     slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1954 #endif
1955
1956     // begin programing
1957     if (i965->intel.has_bsd2)
1958         intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1959     else
1960         intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1961     intel_batchbuffer_emit_mi_flush(batch);
1962
1963     // picture level programing
1964     gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1965
1966     BEGIN_BCS_BATCH(batch, 3);
1967     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1968     OUT_BCS_RELOC(batch,
1969                   slice_batch_bo,
1970                   I915_GEM_DOMAIN_COMMAND, 0,
1971                   0);
1972     OUT_BCS_BATCH(batch, 0);
1973     ADVANCE_BCS_BATCH(batch);
1974
1975     // end programing
1976     intel_batchbuffer_end_atomic(batch);
1977
1978     dri_bo_unreference(slice_batch_bo);
1979 }
1980
1981 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1982         struct encode_state *encode_state,
1983         struct intel_encoder_context *encoder_context,
1984         struct intel_batchbuffer *slice_batch)
1985 {
1986     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1987     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1988     unsigned int skip_emul_byte_cnt;
1989
1990     if (encode_state->packed_header_data[idx]) {
1991         VAEncPackedHeaderParameterBuffer *param = NULL;
1992         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1993         unsigned int length_in_bits;
1994
1995         assert(encode_state->packed_header_param[idx]);
1996         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1997         length_in_bits = param->bit_length;
1998
1999         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2000         mfc_context->insert_object(ctx,
2001                                    encoder_context,
2002                                    header_data,
2003                                    ALIGN(length_in_bits, 32) >> 5,
2004                                    length_in_bits & 0x1f,
2005                                    skip_emul_byte_cnt,
2006                                    0,
2007                                    0,
2008                                    !param->has_emulation_bytes,
2009                                    slice_batch);
2010     }
2011
2012     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
2013
2014     if (encode_state->packed_header_data[idx]) {
2015         VAEncPackedHeaderParameterBuffer *param = NULL;
2016         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2017         unsigned int length_in_bits;
2018
2019         assert(encode_state->packed_header_param[idx]);
2020         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2021         length_in_bits = param->bit_length;
2022
2023         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2024         mfc_context->insert_object(ctx,
2025                                    encoder_context,
2026                                    header_data,
2027                                    ALIGN(length_in_bits, 32) >> 5,
2028                                    length_in_bits & 0x1f,
2029                                    skip_emul_byte_cnt,
2030                                    0,
2031                                    0,
2032                                    !param->has_emulation_bytes,
2033                                    slice_batch);
2034     }
2035
2036     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
2037
2038     if (encode_state->packed_header_data[idx]) {
2039         VAEncPackedHeaderParameterBuffer *param = NULL;
2040         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2041         unsigned int length_in_bits;
2042
2043         assert(encode_state->packed_header_param[idx]);
2044         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2045         length_in_bits = param->bit_length;
2046
2047         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2048
2049         mfc_context->insert_object(ctx,
2050                                    encoder_context,
2051                                    header_data,
2052                                    ALIGN(length_in_bits, 32) >> 5,
2053                                    length_in_bits & 0x1f,
2054                                    skip_emul_byte_cnt,
2055                                    0,
2056                                    0,
2057                                    !param->has_emulation_bytes,
2058                                    slice_batch);
2059     }
2060
2061     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
2062
2063     if (encode_state->packed_header_data[idx]) {
2064         VAEncPackedHeaderParameterBuffer *param = NULL;
2065         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2066         unsigned int length_in_bits;
2067
2068         assert(encode_state->packed_header_param[idx]);
2069         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2070         length_in_bits = param->bit_length;
2071
2072         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2073         mfc_context->insert_object(ctx,
2074                                    encoder_context,
2075                                    header_data,
2076                                    ALIGN(length_in_bits, 32) >> 5,
2077                                    length_in_bits & 0x1f,
2078                                    skip_emul_byte_cnt,
2079                                    0,
2080                                    0,
2081                                    !param->has_emulation_bytes,
2082                                    slice_batch);
2083     }
2084 }
2085
2086 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
2087                                  struct encode_state *encode_state,
2088                                  struct intel_encoder_context *encoder_context)
2089 {
2090     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2091     struct object_surface *obj_surface;
2092     struct object_buffer *obj_buffer;
2093     GenHevcSurface *hevc_encoder_surface;
2094     dri_bo *bo;
2095     VAStatus vaStatus = VA_STATUS_SUCCESS;
2096     int i;
2097     struct i965_coded_buffer_segment *coded_buffer_segment;
2098
2099     /*Setup all the input&output object*/
2100
2101     /* Setup current frame and current direct mv buffer*/
2102     obj_surface = encode_state->reconstructed_object;
2103
2104     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2105     assert(hevc_encoder_surface);
2106
2107     if (hevc_encoder_surface) {
2108         hevc_encoder_surface->has_p010_to_nv12_done=0;
2109         hevc_encoder_surface->base.frame_store_id = -1;
2110         mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2111         dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2112     }
2113
2114     mfc_context->surface_state.width = obj_surface->orig_width;
2115     mfc_context->surface_state.height = obj_surface->orig_height;
2116     mfc_context->surface_state.w_pitch = obj_surface->width;
2117     mfc_context->surface_state.h_pitch = obj_surface->height;
2118
2119     /* Setup reference frames and direct mv buffers*/
2120     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2121         obj_surface = encode_state->reference_objects[i];
2122
2123         if (obj_surface && obj_surface->bo) {
2124             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2125             dri_bo_reference(obj_surface->bo);
2126
2127             /* Check MV temporal buffer */
2128             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2129             assert(hevc_encoder_surface);
2130
2131             if (hevc_encoder_surface) {
2132                 hevc_encoder_surface->base.frame_store_id = -1;
2133                 /* Setup MV temporal buffer */
2134                 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2135                 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2136             }
2137         } else {
2138             break;
2139         }
2140     }
2141
2142
2143     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2144     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2145
2146     obj_buffer = encode_state->coded_buf_object;
2147     bo = obj_buffer->buffer_store->bo;
2148     mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2149     mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2150     mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2151     dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2152
2153     dri_bo_map(bo, 1);
2154     coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2155     coded_buffer_segment->mapped = 0;
2156     coded_buffer_segment->codec = encoder_context->codec;
2157     dri_bo_unmap(bo);
2158
2159     return vaStatus;
2160 }
2161
2162 /* HEVC BRC related */
2163
2164 static void
2165 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2166         struct gen9_hcpe_context *mfc_context)
2167 {
2168     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2169     int ctb_size = 16;
2170     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2171     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2172
2173     float fps =  pSequenceParameter->vui_time_scale / pSequenceParameter->vui_num_units_in_tick ;
2174     double bitrate = pSequenceParameter->bits_per_second * 1.0;
2175     int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2176     int intra_mb_size = inter_mb_size * 5.0;
2177     int i;
2178
2179     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2180     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2181     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2182     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2183     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2184     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2185
2186     for (i = 0 ; i < 3; i++) {
2187         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2188         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2189         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2190         mfc_context->bit_rate_control_context[i].GrowInit = 6;
2191         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2192         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2193         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2194
2195         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2196         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2197         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2198         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2199         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2200         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2201     }
2202
2203     mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2204     mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2205     mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2206
2207     mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2208     mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2209     mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2210 }
2211
2212 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2213                                 struct intel_encoder_context* encoder_context)
2214 {
2215     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2216     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2217     VAEncMiscParameterHRD* pParameterHRD = NULL;
2218     VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
2219
2220     double bitrate = pSequenceParameter->bits_per_second * 1.0;
2221     double framerate = (double)pSequenceParameter->vui_time_scale / (double)pSequenceParameter->vui_num_units_in_tick;
2222     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2223     int intra_period = pSequenceParameter->intra_period;
2224     int ip_period = pSequenceParameter->ip_period;
2225     double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2226     double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2227     double bpf;
2228     int ratio_min = 1;
2229     int ratio_max = 32;
2230     int ratio = 8;
2231     double buffer_size = 0;
2232     int bpp = 1;
2233
2234     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) ||
2235         (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2236         bpp = 2;
2237
2238     qp1_size = qp1_size * bpp;
2239     qp51_size = qp51_size * bpp;
2240
2241     if (!encode_state->misc_param[VAEncMiscParameterTypeHRD][0] || !encode_state->misc_param[VAEncMiscParameterTypeHRD][0]->buffer)
2242         return;
2243
2244     pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD][0]->buffer;
2245     pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
2246
2247     if (pSequenceParameter->ip_period) {
2248         pnum = (intra_period + ip_period - 1) / ip_period - 1;
2249         bnum = intra_period - inum - pnum;
2250     }
2251
2252     mfc_context->brc.mode = encoder_context->rate_control_mode;
2253
2254     mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2255             (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2256     mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2257     mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2258
2259     mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2260     mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2261     mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2262
2263     bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2264
2265     if (!pParameterHRD || pParameterHRD->buffer_size <= 0)
2266     {
2267         mfc_context->hrd.buffer_size = bitrate * ratio;
2268         mfc_context->hrd.current_buffer_fullness =
2269             (double)(bitrate * ratio/2 < mfc_context->hrd.buffer_size) ?
2270             bitrate * ratio/2 : mfc_context->hrd.buffer_size / 2.;
2271     }else
2272     {
2273         buffer_size = (double)pParameterHRD->buffer_size ;
2274         if(buffer_size < bitrate * ratio_min)
2275         {
2276             buffer_size = bitrate * ratio_min;
2277         }else if (buffer_size > bitrate * ratio_max)
2278         {
2279             buffer_size = bitrate * ratio_max ;
2280         }
2281         mfc_context->hrd.buffer_size =buffer_size;
2282         if(pParameterHRD->initial_buffer_fullness > 0)
2283         {
2284             mfc_context->hrd.current_buffer_fullness =
2285                 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2286                 pParameterHRD->initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2287         }else
2288         {
2289             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
2290
2291         }
2292     }
2293
2294     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2295     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2296     mfc_context->hrd.violation_noted = 0;
2297
2298     if ((bpf > qp51_size) && (bpf < qp1_size)) {
2299         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2300     } else if (bpf >= qp1_size)
2301         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2302     else if (bpf <= qp51_size)
2303         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2304
2305     mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2306     mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2307
2308     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36);
2309     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40);
2310     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45);
2311 }
2312
2313 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2314                           struct gen9_hcpe_context *mfc_context,
2315                           int frame_bits)
2316 {
2317     double prev_bf = mfc_context->hrd.current_buffer_fullness;
2318
2319     mfc_context->hrd.current_buffer_fullness -= frame_bits;
2320
2321     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2322         mfc_context->hrd.current_buffer_fullness = prev_bf;
2323         return BRC_UNDERFLOW;
2324     }
2325
2326     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2327     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2328         if (mfc_context->brc.mode == VA_RC_VBR)
2329             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2330         else {
2331             mfc_context->hrd.current_buffer_fullness = prev_bf;
2332             return BRC_OVERFLOW;
2333         }
2334     }
2335     return BRC_NO_HRD_VIOLATION;
2336 }
2337
2338 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2339                             struct gen9_hcpe_context *mfc_context,
2340                             int frame_bits)
2341 {
2342     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2343     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2344     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2345     int slicetype = pSliceParameter->slice_type;
2346     int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2347     int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2348     int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2349     int qp; // quantizer of previously encoded slice of current type
2350     int qpn; // predicted quantizer for next frame of current type in integer format
2351     double qpf; // predicted quantizer for next frame of current type in float format
2352     double delta_qp; // QP correction
2353     int target_frame_size, frame_size_next;
2354     /* Notes:
2355      *  x - how far we are from HRD buffer borders
2356      *  y - how far we are from target HRD buffer fullness
2357      */
2358     double x, y;
2359     double frame_size_alpha;
2360
2361     if(slicetype == HEVC_SLICE_B) {
2362         if(pSequenceParameter->ip_period == 1)
2363         {
2364             slicetype = HEVC_SLICE_P;
2365         }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2366             slicetype = HEVC_SLICE_P;
2367         }
2368     }
2369
2370     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2371
2372     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2373     if (mfc_context->hrd.buffer_capacity < 5)
2374         frame_size_alpha = 0;
2375     else
2376         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2377     if (frame_size_alpha > 30) frame_size_alpha = 30;
2378     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2379                       (double)(frame_size_alpha + 1.);
2380
2381     /* frame_size_next: avoiding negative number and too small value */
2382     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2383         frame_size_next = (int)((double)target_frame_size * 0.25);
2384
2385     qpf = (double)qp * target_frame_size / frame_size_next;
2386     qpn = (int)(qpf + 0.5);
2387
2388     if (qpn == qp) {
2389         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2390         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2391         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2392             qpn++;
2393             mfc_context->brc.qpf_rounding_accumulator = 0.;
2394         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2395             qpn--;
2396             mfc_context->brc.qpf_rounding_accumulator = 0.;
2397         }
2398     }
2399     /* making sure that QP is not changing too fast */
2400     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2401     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2402     /* making sure that with QP predictions we did do not leave QPs range */
2403     BRC_CLIP(qpn, 1, 51);
2404
2405     /* checking wthether HRD compliance is still met */
2406     sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2407
2408     /* calculating QP delta as some function*/
2409     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2410     if (x > 0) {
2411         x /= mfc_context->hrd.target_buffer_fullness;
2412         y = mfc_context->hrd.current_buffer_fullness;
2413     } else {
2414         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2415         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2416     }
2417     if (y < 0.01) y = 0.01;
2418     if (x > 1) x = 1;
2419     else if (x < -1) x = -1;
2420
2421     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2422     qpn = (int)(qpn + delta_qp + 0.5);
2423
2424     /* making sure that with QP predictions we did do not leave QPs range */
2425     BRC_CLIP(qpn, 1, 51);
2426
2427     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2428         /* correcting QPs of slices of other types */
2429         if (slicetype == HEVC_SLICE_P) {
2430             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2431                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2432             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2433                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2434         } else if (slicetype == HEVC_SLICE_I) {
2435             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2436                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2437             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2438                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2439         } else { // HEVC_SLICE_B
2440             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2441                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2442             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2443                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2444         }
2445         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2446         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2447         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2448     } else if (sts == BRC_UNDERFLOW) { // underflow
2449         if (qpn <= qp) qpn = qp + 1;
2450         if (qpn > 51) {
2451             qpn = 51;
2452             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2453         }
2454     } else if (sts == BRC_OVERFLOW) {
2455         if (qpn >= qp) qpn = qp - 1;
2456         if (qpn < 1) { // < 0 (?) overflow with minQP
2457             qpn = 1;
2458             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2459         }
2460     }
2461
2462     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2463
2464     return sts;
2465 }
2466
2467 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2468                                         struct intel_encoder_context *encoder_context)
2469 {
2470     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2471     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2472     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2473     int target_bit_rate = pSequenceParameter->bits_per_second;
2474
2475     // current we only support CBR mode.
2476     if (rate_control_mode == VA_RC_CBR) {
2477         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2478         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2479         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2480         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2481         mfc_context->vui_hrd.i_frame_number = 0;
2482
2483         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2484         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2485         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2486     }
2487
2488 }
2489
2490 void
2491 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2492                               struct gen9_hcpe_context *mfc_context)
2493 {
2494     mfc_context->vui_hrd.i_frame_number++;
2495 }
2496
2497 int intel_hcpe_interlace_check(VADriverContextP ctx,
2498                                struct encode_state *encode_state,
2499                                struct intel_encoder_context *encoder_context)
2500 {
2501     VAEncSliceParameterBufferHEVC *pSliceParameter;
2502     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2503     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2504     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2505     int ctb_size = 1 << log2_ctb_size;
2506     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2507     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2508     int i;
2509     int ctbCount = 0;
2510
2511     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2512         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2513         ctbCount += pSliceParameter->num_ctu_in_slice;
2514     }
2515
2516     if (ctbCount == (width_in_ctb * height_in_ctb))
2517         return 0;
2518
2519     return 1;
2520 }
2521
2522 /*
2523  * Check whether the parameters related with CBR are updated and decide whether
2524  * it needs to reinitialize the configuration related with CBR.
2525  * Currently it will check the following parameters:
2526  *      bits_per_second
2527  *      frame_rate
2528  *      gop_configuration(intra_period, ip_period, intra_idr_period)
2529  */
2530 static bool intel_hcpe_brc_updated_check(struct encode_state *encode_state,
2531         struct intel_encoder_context *encoder_context)
2532 {
2533     /* to do */
2534     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2535     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2536     double cur_fps, cur_bitrate;
2537     VAEncSequenceParameterBufferHEVC *pSequenceParameter;
2538
2539
2540     if (rate_control_mode != VA_RC_CBR) {
2541         return false;
2542     }
2543
2544     pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2545
2546     cur_bitrate = pSequenceParameter->bits_per_second;
2547     cur_fps = (double)pSequenceParameter->vui_time_scale /
2548               (double)pSequenceParameter->vui_num_units_in_tick;
2549
2550     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
2551         (cur_fps == mfc_context->brc.saved_fps) &&
2552         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
2553         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
2554         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
2555         /* the parameters related with CBR are not updaetd */
2556         return false;
2557     }
2558
2559     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
2560     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
2561     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
2562     mfc_context->brc.saved_fps = cur_fps;
2563     mfc_context->brc.saved_bps = cur_bitrate;
2564     return true;
2565 }
2566
2567 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2568                             struct intel_encoder_context *encoder_context)
2569 {
2570     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2571     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2572
2573     if (rate_control_mode == VA_RC_CBR) {
2574         bool brc_updated;
2575         assert(encoder_context->codec != CODEC_MPEG2);
2576
2577         brc_updated = intel_hcpe_brc_updated_check(encode_state, encoder_context);
2578
2579         /*Programing bit rate control */
2580         if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2581             brc_updated) {
2582             intel_hcpe_bit_rate_control_context_init(encode_state, mfc_context);
2583             intel_hcpe_brc_init(encode_state, encoder_context);
2584         }
2585
2586         /*Programing HRD control */
2587         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2588             intel_hcpe_hrd_context_init(encode_state, encoder_context);
2589     }
2590 }
2591
2592 /* HEVC interface API for encoder */
2593
2594 static VAStatus
2595 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2596                               struct encode_state *encode_state,
2597                               struct intel_encoder_context *encoder_context)
2598 {
2599     struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2600     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2601     int current_frame_bits_size;
2602     int sts;
2603
2604     for (;;) {
2605         gen9_hcpe_init(ctx, encode_state, encoder_context);
2606         intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2607         /*Programing bcs pipeline*/
2608         gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2609         gen9_hcpe_run(ctx, encode_state, encoder_context);
2610         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2611             gen9_hcpe_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
2612             sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2613             if (sts == BRC_NO_HRD_VIOLATION) {
2614                 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2615                 break;
2616             } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2617                 if (!hcpe_context->hrd.violation_noted) {
2618                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2619                     hcpe_context->hrd.violation_noted = 1;
2620                 }
2621                 return VA_STATUS_SUCCESS;
2622             }
2623         } else {
2624             break;
2625         }
2626     }
2627
2628     return VA_STATUS_SUCCESS;
2629 }
2630
2631 void
2632 gen9_hcpe_context_destroy(void *context)
2633 {
2634     struct gen9_hcpe_context *hcpe_context = context;
2635     int i;
2636
2637     dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2638     hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2639
2640     dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2641     hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2642
2643     dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2644     hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2645
2646     dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2647     hcpe_context->uncompressed_picture_source.bo = NULL;
2648
2649     dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2650     hcpe_context->metadata_line_buffer.bo = NULL;
2651
2652     dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2653     hcpe_context->metadata_tile_line_buffer.bo = NULL;
2654
2655     dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2656     hcpe_context->metadata_tile_column_buffer.bo = NULL;
2657
2658     dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2659     hcpe_context->sao_line_buffer.bo = NULL;
2660
2661     dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2662     hcpe_context->sao_tile_line_buffer.bo = NULL;
2663
2664     dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2665     hcpe_context->sao_tile_column_buffer.bo = NULL;
2666
2667     /* mv temporal buffer */
2668     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2669         if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2670             dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2671         hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2672     }
2673
2674     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2675         dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2676         hcpe_context->reference_surfaces[i].bo = NULL;
2677     }
2678
2679     dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2680     hcpe_context->hcp_indirect_cu_object.bo = NULL;
2681
2682     dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2683     hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2684
2685     dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2686     hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2687
2688     dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2689     hcpe_context->aux_batchbuffer_surface.bo = NULL;
2690
2691     if (hcpe_context->aux_batchbuffer)
2692         intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2693
2694     hcpe_context->aux_batchbuffer = NULL;
2695
2696     free(hcpe_context);
2697 }
2698
2699 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2700                             VAProfile profile,
2701                             struct encode_state *encode_state,
2702                             struct intel_encoder_context *encoder_context)
2703 {
2704     VAStatus vaStatus;
2705
2706     switch (profile) {
2707     case VAProfileHEVCMain:
2708     case VAProfileHEVCMain10:
2709         vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2710         break;
2711
2712     default:
2713         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2714         break;
2715     }
2716
2717     return vaStatus;
2718 }
2719
2720 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2721 {
2722     struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2723
2724     assert(hcpe_context);
2725     hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2726     hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2727     hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2728     hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2729     hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2730     hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2731     hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2732     hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2733
2734     encoder_context->mfc_context = hcpe_context;
2735     encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2736     encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2737     encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2738
2739     hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);
2740
2741     return True;
2742 }