OSDN Git Service

Update README.md
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc_hevc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Qu Pengfei <Pengfei.Qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen9_mfc.h"
42 #include "gen6_vme.h"
43 #include "intel_media.h"
44
45 typedef enum _gen6_brc_status {
46     BRC_NO_HRD_VIOLATION = 0,
47     BRC_UNDERFLOW = 1,
48     BRC_OVERFLOW = 2,
49     BRC_UNDERFLOW_WITH_MAX_QP = 3,
50     BRC_OVERFLOW_WITH_MIN_QP = 4,
51 } gen6_brc_status;
52
53 /* BRC define */
54 #define BRC_CLIP(x, min, max)                                   \
55     {                                                           \
56         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
57     }
58
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
62
63 #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
65
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
70
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
72
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do {     \
75         dri_bo_unreference(gen_buffer->bo);                     \
76         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
77                                       string,                   \
78                                       size,                     \
79                                       0x1000);                  \
80         assert(gen_buffer->bo);                                 \
81     } while (0);
82
83
84 #define OUT_BUFFER_X(buf_bo, is_target, ma)  do {                         \
85         if (buf_bo) {                                                   \
86             OUT_BCS_RELOC64(batch,                                        \
87                           buf_bo,                                       \
88                           I915_GEM_DOMAIN_INSTRUCTION,                       \
89                           is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0,       \
90                           0);                                           \
91         } else {                                                        \
92             OUT_BCS_BATCH(batch, 0);                                    \
93             OUT_BCS_BATCH(batch, 0);                                    \
94         }                                                               \
95         if (ma)                                                         \
96             OUT_BCS_BATCH(batch, i965->intel.mocs_state);                                    \
97     } while (0)
98
99 #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER_X(buf_bo, 0, 0)
103
104
105 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
108
109 #define HCP_SOFTWARE_SKYLAKE    1
110
111 #define NUM_HCPE_KERNEL 2
112
113 #define     INTER_MODE_MASK     0x03
114 #define     INTER_8X8       0x03
115 #define     INTER_16X8      0x01
116 #define     INTER_8X16      0x02
117 #define     SUBMB_SHAPE_MASK    0x00FF00
118
119 #define     INTER_MV8       (4 << 20)
120 #define     INTER_MV32      (6 << 20)
121
122
123 /* HEVC */
124
125 /* utils */
126 static void
127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
128 {
129     /* Flat_4x4_16 */
130     memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
131
132     /* Flat_8x8_16 */
133     memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
134
135     /* Flat_16x16_16 */
136     memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
137
138     /* Flat_32x32_16 */
139     memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
140
141     /* Flat_16x16_dc_16 */
142     memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
143
144     /* Flat_32x32_dc_16 */
145     memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
146 }
147
148 /* HEVC picture and slice state related */
149
150 static void
151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
152                            int standard_select,
153                            struct intel_encoder_context *encoder_context)
154 {
155     struct i965_driver_data *i965 = i965_driver_data(ctx);
156     struct intel_batchbuffer *batch = encoder_context->base.batch;
157
158     assert(standard_select == HCP_CODEC_HEVC);
159
160     if (IS_KBL(i965->intel.device_info) ||
161         IS_GLK(i965->intel.device_info)) {
162         BEGIN_BCS_BATCH(batch, 6);
163
164         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
165     } else {
166         BEGIN_BCS_BATCH(batch, 4);
167
168         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
169     }
170
171     OUT_BCS_BATCH(batch,
172                   (standard_select << 5) |
173                   (0 << 3) | /* disable Pic Status / Error Report */
174                   HCP_CODEC_SELECT_ENCODE);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177
178     if (IS_KBL(i965->intel.device_info) ||
179         IS_GLK(i965->intel.device_info)) {
180         OUT_BCS_BATCH(batch, 0);
181         OUT_BCS_BATCH(batch, 0);
182     }
183
184     ADVANCE_BCS_BATCH(batch);
185 }
186
187 static void
188 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
189                         struct intel_encoder_context *encoder_context)
190 {
191     struct intel_batchbuffer *batch = encoder_context->base.batch;
192     struct object_surface *obj_surface = encode_state->reconstructed_object;
193     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
194     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
195     unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8;
196
197     /* to do */
198     unsigned int y_cb_offset;
199
200     assert(obj_surface);
201
202     if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
203         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) {
204         assert(obj_surface->fourcc == VA_FOURCC_P010);
205         surface_format = SURFACE_FORMAT_P010;
206     }
207
208     y_cb_offset = obj_surface->y_cb_offset;
209
210     BEGIN_BCS_BATCH(batch, 3);
211     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
212     OUT_BCS_BATCH(batch,
213                   (1 << 28) |                   /* surface id */
214                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
215     OUT_BCS_BATCH(batch,
216                   surface_format << 28 |
217                   y_cb_offset);
218     ADVANCE_BCS_BATCH(batch);
219
220     BEGIN_BCS_BATCH(batch, 3);
221     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
222     OUT_BCS_BATCH(batch,
223                   (0 << 28) |                   /* surface id */
224                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
225     OUT_BCS_BATCH(batch,
226                   surface_format << 28 |
227                   y_cb_offset);
228     ADVANCE_BCS_BATCH(batch);
229 }
230
231 static void
232 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
233                               struct intel_encoder_context *encoder_context)
234 {
235     struct i965_driver_data *i965 = i965_driver_data(ctx);
236     struct intel_batchbuffer *batch = encoder_context->base.batch;
237     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
238     struct object_surface *obj_surface;
239     GenHevcSurface *hcpe_hevc_surface;
240     dri_bo *bo;
241     unsigned int i;
242
243     if (IS_KBL(i965->intel.device_info) ||
244         IS_GLK(i965->intel.device_info)) {
245         BEGIN_BCS_BATCH(batch, 104);
246
247         OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
248     } else {
249         BEGIN_BCS_BATCH(batch, 95);
250
251         OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
252     }
253
254     obj_surface = encode_state->reconstructed_object;
255     assert(obj_surface && obj_surface->bo);
256     hcpe_hevc_surface = obj_surface->private_data;
257     assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
258
259     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
260     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
261     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
262     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
263     OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo);         /* DW 13..15 */
264     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
265     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
266     OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo);              /* DW 22..24 */
267     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
268     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
269     OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
270     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
271
272     /* here only max 8 reference allowed */
273     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
274         bo = mfc_context->reference_surfaces[i].bo;
275
276         if (bo) {
277             OUT_BUFFER_NMA_REFERENCE(bo);
278         } else
279             OUT_BUFFER_NMA_REFERENCE(NULL);
280     }
281     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
282
283     OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
284     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore  */
285     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore  */
286     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore  */
287
288     for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
289         bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
290
291         if (bo) {
292             OUT_BUFFER_NMA_REFERENCE(bo);
293         } else
294             OUT_BUFFER_NMA_REFERENCE(NULL);
295     }
296     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
297
298     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
299     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
300     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
301     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
302
303     if (IS_KBL(i965->intel.device_info) ||
304         IS_GLK(i965->intel.device_info)) {
305         for (i = 0; i < 9; i++)
306             OUT_BCS_BATCH(batch, 0);
307     }
308
309     ADVANCE_BCS_BATCH(batch);
310 }
311
312 static void
313 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
314                                   struct intel_encoder_context *encoder_context)
315 {
316     struct i965_driver_data *i965 = i965_driver_data(ctx);
317     struct intel_batchbuffer *batch = encoder_context->base.batch;
318     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
319
320     /* to do */
321     BEGIN_BCS_BATCH(batch, 14);
322
323     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
324     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 1..3 igonre for encoder*/
325     OUT_BUFFER_NMA_REFERENCE(NULL);                /* DW 4..5, Upper Bound */
326     OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo);                 /* DW 6..8, CU */
327     /* DW 9..11, PAK-BSE */
328     OUT_BCS_RELOC64(batch,
329                     mfc_context->hcp_indirect_pak_bse_object.bo,
330                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
331                     mfc_context->hcp_indirect_pak_bse_object.offset);
332     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
333     OUT_BCS_RELOC64(batch,
334                     mfc_context->hcp_indirect_pak_bse_object.bo,
335                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
336                     mfc_context->hcp_indirect_pak_bse_object.end_offset);
337
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen9_hcpe_fqm_state(VADriverContextP ctx,
343                     int size_id,
344                     int color_component,
345                     int pred_type,
346                     int dc,
347                     unsigned int *fqm,
348                     int fqm_length,
349                     struct intel_encoder_context *encoder_context)
350 {
351     struct intel_batchbuffer *batch = encoder_context->base.batch;
352     unsigned int fqm_buffer[32];
353
354     assert(fqm_length <= 32);
355     assert(sizeof(*fqm) == 4);
356     memset(fqm_buffer, 0, sizeof(fqm_buffer));
357     memcpy(fqm_buffer, fqm, fqm_length * 4);
358
359     BEGIN_BCS_BATCH(batch, 34);
360
361     OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
362     OUT_BCS_BATCH(batch,
363                   dc << 16 |
364                   color_component << 3 |
365                   size_id << 1 |
366                   pred_type);
367     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
368
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372
373 static void
374 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
375 {
376     unsigned int qm[32] = {
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000
385     };
386
387     gen9_hcpe_fqm_state(ctx,
388                         0, 0, 0, 0,
389                         qm, 8,
390                         encoder_context);
391     gen9_hcpe_fqm_state(ctx,
392                         0, 0, 1, 0,
393                         qm, 8,
394                         encoder_context);
395     gen9_hcpe_fqm_state(ctx,
396                         1, 0, 0, 0,
397                         qm, 32,
398                         encoder_context);
399     gen9_hcpe_fqm_state(ctx,
400                         1, 0, 1, 0,
401                         qm, 32,
402                         encoder_context);
403     gen9_hcpe_fqm_state(ctx,
404                         2, 0, 0, 0x1000,
405                         qm, 0,
406                         encoder_context);
407     gen9_hcpe_fqm_state(ctx,
408                         2, 0, 1, 0x1000,
409                         qm, 0,
410                         encoder_context);
411     gen9_hcpe_fqm_state(ctx,
412                         3, 0, 0, 0x1000,
413                         qm, 0,
414                         encoder_context);
415     gen9_hcpe_fqm_state(ctx,
416                         3, 0, 1, 0x1000,
417                         qm, 0,
418                         encoder_context);
419 }
420
421 static void
422 gen9_hcpe_qm_state(VADriverContextP ctx,
423                    int size_id,
424                    int color_component,
425                    int pred_type,
426                    int dc,
427                    unsigned int *qm,
428                    int qm_length,
429                    struct intel_encoder_context *encoder_context)
430 {
431     struct intel_batchbuffer *batch = encoder_context->base.batch;
432     unsigned int qm_buffer[16];
433
434     assert(qm_length <= 16);
435     assert(sizeof(*qm) == 4);
436     memset(qm_buffer, 0, sizeof(qm_buffer));
437     memcpy(qm_buffer, qm, qm_length * 4);
438
439     BEGIN_BCS_BATCH(batch, 18);
440
441     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
442     OUT_BCS_BATCH(batch,
443                   dc << 5 |
444                   color_component << 3 |
445                   size_id << 1 |
446                   pred_type);
447     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
448
449     ADVANCE_BCS_BATCH(batch);
450 }
451
452 static void
453 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
454 {
455
456     int i;
457
458     unsigned int qm[16] = {
459         0x10101010, 0x10101010, 0x10101010, 0x10101010,
460         0x10101010, 0x10101010, 0x10101010, 0x10101010,
461         0x10101010, 0x10101010, 0x10101010, 0x10101010,
462         0x10101010, 0x10101010, 0x10101010, 0x10101010
463     };
464
465     for (i = 0; i < 6; i++) {
466         gen9_hcpe_qm_state(ctx,
467                            0, i % 3, i / 3, 0,
468                            qm, 4,
469                            encoder_context);
470     }
471
472     for (i = 0; i < 6; i++) {
473         gen9_hcpe_qm_state(ctx,
474                            1, i % 3, i / 3, 0,
475                            qm, 16,
476                            encoder_context);
477     }
478
479     for (i = 0; i < 6; i++) {
480         gen9_hcpe_qm_state(ctx,
481                            2, i % 3, i / 3, 16,
482                            qm, 16,
483                            encoder_context);
484     }
485
486     for (i = 0; i < 2; i++) {
487         gen9_hcpe_qm_state(ctx,
488                            3, 0, i % 2, 16,
489                            qm, 16,
490                            encoder_context);
491     }
492 }
493
494 static void
495 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
496                          struct intel_encoder_context *encoder_context)
497 {
498     struct i965_driver_data *i965 = i965_driver_data(ctx);
499     struct intel_batchbuffer *batch = encoder_context->base.batch;
500     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
501     VAEncPictureParameterBufferHEVC *pic_param ;
502     VAEncSequenceParameterBufferHEVC *seq_param ;
503
504     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
505     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
506     /*
507      * 7.4.3.1
508      *
509      * When not present, the value of loop_filter_across_tiles_enabled_flag
510      * is inferred to be equal to 1.
511      */
512     int loop_filter_across_tiles_enabled_flag = 0;
513     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
514     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
515
516     int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
517     int log2_ctb_size =  seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
518     int ctb_size = 1 << log2_ctb_size;
519     double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
520     int maxctubits = (int)(5 * rawctubits / 3) ;
521     double bitrate = (double)encoder_context->brc.bits_per_second[0];
522     double framebitrate = bitrate / 32 / 8; //32 byte unit
523     int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
524     int maxframebitrate = (int)(framebitrate * 10 / 10);
525     int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
526     int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
527     int minframesize = 0;//(int)(rawframebits * 1/50);
528
529     if (seq_param->seq_fields.bits.pcm_enabled_flag) {
530         max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
531         min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
532         pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
533         pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
534     } else {
535         max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
536     }
537
538     if (pic_param->pic_fields.bits.tiles_enabled_flag)
539         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
540
541     /* set zero for encoder */
542     loop_filter_across_tiles_enabled_flag = 0;
543
544     if (IS_KBL(i965->intel.device_info) ||
545         IS_GLK(i965->intel.device_info)) {
546         BEGIN_BCS_BATCH(batch, 31);
547
548         OUT_BCS_BATCH(batch, HCP_PIC_STATE | (31 - 2));
549     } else {
550         BEGIN_BCS_BATCH(batch, 19);
551
552         OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
553     }
554
555     OUT_BCS_BATCH(batch,
556                   mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
557                   0 << 14 |
558                   mfc_context->pic_size.picture_width_in_min_cb_minus1);
559     OUT_BCS_BATCH(batch,
560                   max_pcm_size_minus3 << 10 |
561                   min_pcm_size_minus3 << 8 |
562                   (seq_param->log2_min_transform_block_size_minus2 +
563                    seq_param->log2_diff_max_min_transform_block_size) << 6 |
564                   seq_param->log2_min_transform_block_size_minus2 << 4 |
565                   (seq_param->log2_min_luma_coding_block_size_minus3 +
566                    seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
567                   seq_param->log2_min_luma_coding_block_size_minus3);
568     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
569     OUT_BCS_BATCH(batch,
570                   ((IS_KBL(i965->intel.device_info) || IS_GLK(i965->intel.device_info)) ?
571                    1 : 0) << 27 | /* CU packet structure is 0 for SKL */
572                   seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
573                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
574                   seq_param->seq_fields.bits.amp_enabled_flag << 23 |
575                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
576                   0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
577                   0 << 20 |     /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
578                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
579                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
580                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |                 /* 0 for encoder */
581                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
582                   loop_filter_across_tiles_enabled_flag << 15 |
583                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |  /* 0 for encoder */
584                   pic_param->log2_parallel_merge_level_minus2 << 10 |               /* 0 for encoder */
585                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |     /* 0 for encoder */
586                   seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
587                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |                 /* 0 for encoder */
588                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |        /* 0 for encoder */
589                   seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
590                   seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
591                   0);
592     OUT_BCS_BATCH(batch,
593                   seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 |                 /* 10 bit for KBL+*/
594                   seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 |                 /* 10 bit for KBL+ */
595                   pcm_sample_bit_depth_luma_minus1 << 20 |
596                   pcm_sample_bit_depth_chroma_minus1 << 16 |
597                   seq_param->max_transform_hierarchy_depth_inter << 13 |    /*  for encoder */
598                   seq_param->max_transform_hierarchy_depth_intra << 10 |    /*  for encoder */
599                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
600                   (pic_param->pps_cb_qp_offset & 0x1f));
601     OUT_BCS_BATCH(batch,
602                   0 << 29 | /* must be 0 for encoder */
603                   maxctubits); /* DW 6, max LCU bit size allowed for encoder  */
604     OUT_BCS_BATCH(batch,
605                   0 << 31 | /* frame bitrate max unit */
606                   maxframebitrate); /* DW 7, frame bitrate max 0:13   */
607     OUT_BCS_BATCH(batch,
608                   0 << 31 | /* frame bitrate min unit */
609                   minframebitrate); /* DW 8, frame bitrate min 0:13   */
610     OUT_BCS_BATCH(batch,
611                   maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
612                   mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
613     OUT_BCS_BATCH(batch, 0x07050402);   /* DW 10, frame delta qp max */
614     OUT_BCS_BATCH(batch, 0x0d0b0908);
615     OUT_BCS_BATCH(batch, 0);    /* DW 12, frame delta qp min */
616     OUT_BCS_BATCH(batch, 0);
617     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 14, frame delta qp max range  */
618     OUT_BCS_BATCH(batch, 0x100c0806);   /* DW 15 */
619     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 16, frame delta qp min range  */
620     OUT_BCS_BATCH(batch, 0x100c0806);
621     OUT_BCS_BATCH(batch,
622                   0 << 30 |
623                   minframesize);    /* DW 18, min frame size units */
624
625     if (IS_KBL(i965->intel.device_info) ||
626         IS_GLK(i965->intel.device_info)) {
627         int i = 0;
628
629         for (i = 0; i < 12; i++)
630             OUT_BCS_BATCH(batch, 0);
631     }
632
633     ADVANCE_BCS_BATCH(batch);
634 }
635
636
637 static void
638 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
639                              unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
640                              int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
641                              struct intel_batchbuffer *batch)
642 {
643     if (batch == NULL)
644         batch = encoder_context->base.batch;
645
646     if (data_bits_in_last_dw == 0)
647         data_bits_in_last_dw = 32;
648
649     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
650
651     OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
652     OUT_BCS_BATCH(batch,
653                   (0 << 31) |   /* inline payload */
654                   (0 << 16) |   /* always start at offset 0 */
655                   (0 << 15) |   /* HeaderLengthExcludeFrmSize */
656                   (data_bits_in_last_dw << 8) |
657                   (skip_emul_byte_count << 4) |
658                   (!!emulation_flag << 3) |
659                   ((!!is_last_header) << 2) |
660                   ((!!is_end_of_slice) << 1) |
661                   (0 << 0));    /* Reserved */
662     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
663
664     ADVANCE_BCS_BATCH(batch);
665 }
666 /*
667 // To be do: future
668 static uint8_t
669 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
670 {
671     unsigned int is_long_term =
672         !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
673     unsigned int is_top_field =
674         !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
675     unsigned int is_bottom_field =
676         !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
677
678     return ((is_long_term                         << 6) |
679             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
680             (frame_store_id                       << 1) |
681             ((is_top_field ^ 1) & is_bottom_field));
682 }
683 */
684 static void
685 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
686                           int list,
687                           struct intel_encoder_context *encoder_context,
688                           struct encode_state *encode_state)
689 {
690     int i;
691     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
692     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
693     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
694     VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
695     struct gen6_vme_context *vme_context = encoder_context->vme_context;
696     struct object_surface *obj_surface;
697     int frame_index;
698
699     int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
700
701     if (ref_idx_l0 > 3) {
702         WARN_ONCE("ref_idx_l0 is out of range\n");
703         ref_idx_l0 = 0;
704     }
705
706     obj_surface = vme_context->used_reference_objects[list];
707     frame_index = -1;
708     for (i = 0; i < 16; i++) {
709         if (obj_surface &&
710             obj_surface == encode_state->reference_objects[i]) {
711             frame_index = i;
712             break;
713         }
714     }
715     if (frame_index == -1) {
716         WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
717     }
718
719     BEGIN_BCS_BATCH(batch, 18);
720
721     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
722     OUT_BCS_BATCH(batch,
723                   num_ref_minus1 << 1 |
724                   list);
725
726     for (i = 0; i < 16; i++) {
727         if (i < MIN((num_ref_minus1 + 1), 15)) {
728             VAPictureHEVC *ref_pic = &ref_list[i];
729             VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
730
731             OUT_BCS_BATCH(batch,
732                           1 << 15 |         /* bottom_field_flag 0 */
733                           0 << 14 |         /* field_pic_flag 0 */
734                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |  /* short term is 1 */
735                           0 << 12 | /* disable WP */
736                           0 << 11 | /* disable WP */
737                           frame_index << 8 |
738                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
739         } else {
740             OUT_BCS_BATCH(batch, 0);
741         }
742     }
743
744     ADVANCE_BCS_BATCH(batch);
745 }
746
747 void
748 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
749                               struct encode_state *encode_state,
750                               struct intel_encoder_context *encoder_context
751                              )
752 {
753     struct intel_batchbuffer *batch = encoder_context->base.batch;
754     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
755
756     if (slice_param->slice_type == HEVC_SLICE_I)
757         return;
758
759     gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
760
761     if (slice_param->slice_type == HEVC_SLICE_P)
762         return;
763
764     gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
765 }
766
767 static void
768 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
769                            VAEncPictureParameterBufferHEVC *pic_param,
770                            VAEncSliceParameterBufferHEVC *slice_param,
771                            struct encode_state *encode_state,
772                            struct intel_encoder_context *encoder_context,
773                            struct intel_batchbuffer *batch)
774 {
775     struct i965_driver_data *i965 = i965_driver_data(ctx);
776     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
777     int slice_type = slice_param->slice_type;
778
779     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
780     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
781     int ctb_size = 1 << log2_ctb_size;
782     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
783     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
784     int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
785
786     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
787
788     slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
789     slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
790
791     next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
792     next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
793
794     /* only support multi slice begin from row start address */
795     assert((slice_param->slice_segment_address % width_in_ctb) == 0);
796
797     if (last_slice == 1) {
798         if (slice_param->slice_segment_address == 0) {
799             next_slice_hor_pos = 0;
800             next_slice_ver_pos = height_in_ctb;
801         } else {
802             next_slice_hor_pos = 0;
803             next_slice_ver_pos = 0;
804         }
805     }
806
807     if (IS_KBL(i965->intel.device_info) ||
808         IS_GLK(i965->intel.device_info)) {
809         BEGIN_BCS_BATCH(batch, 11);
810
811         OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (11 - 2));
812     } else {
813         BEGIN_BCS_BATCH(batch, 9);
814
815         OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
816     }
817
818     OUT_BCS_BATCH(batch,
819                   slice_ver_pos << 16 |
820                   slice_hor_pos);
821     OUT_BCS_BATCH(batch,
822                   next_slice_ver_pos << 16 |
823                   next_slice_hor_pos);
824     OUT_BCS_BATCH(batch,
825                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
826                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
827                   (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
828                   slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
829                   slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
830                   last_slice << 2 |
831                   slice_type);
832     OUT_BCS_BATCH(batch,
833                   0 << 26 |
834                   (slice_param->max_num_merge_cand - 1)  << 23 |
835                   slice_param->slice_fields.bits.cabac_init_flag << 22 |
836                   slice_param->luma_log2_weight_denom << 19 |
837                   (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
838                   slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
839                   (slice_type != HEVC_SLICE_B) << 14 |
840                   slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
841                   slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
842                   slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
843                   slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
844                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
845                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
846                   slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
847     OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
848     OUT_BCS_BATCH(batch,
849                   4 << 26 |
850                   4 << 20 |
851                   0);
852     OUT_BCS_BATCH(batch,
853                   1 << 10 |  /* header insertion enable */
854                   1 << 9  |  /* slice data enable */
855                   1 << 8  |  /* tail insertion enable, must at end of frame, not slice */
856                   1 << 2  |  /* RBSP or EBSP, EmulationByteSliceInsertEnable */
857                   1 << 1  |  /* cabacZeroWordInsertionEnable */
858                   0);        /* Ignored for decoding */
859     OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
860
861     if (IS_KBL(i965->intel.device_info) ||
862         IS_GLK(i965->intel.device_info)) {
863         OUT_BCS_BATCH(batch, 0);
864         OUT_BCS_BATCH(batch, 0);
865     }
866
867     ADVANCE_BCS_BATCH(batch);
868 }
869
870 /* HEVC pipe line related */
871 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
872                                                        struct encode_state *encode_state,
873                                                        struct intel_encoder_context *encoder_context)
874 {
875     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
876
877     mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
878     mfc_context->set_surface_state(ctx, encode_state, encoder_context);
879     gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
880     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
881
882     mfc_context->qm_state(ctx, encoder_context);
883     mfc_context->fqm_state(ctx, encoder_context);
884     mfc_context->pic_state(ctx, encode_state, encoder_context);
885     intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
886 }
887
888 static void gen9_hcpe_init(VADriverContextP ctx,
889                            struct encode_state *encode_state,
890                            struct intel_encoder_context *encoder_context)
891 {
892     /* to do */
893     struct i965_driver_data *i965 = i965_driver_data(ctx);
894     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
895     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
896     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
897     dri_bo *bo;
898     int i, size = 0;
899     int slice_batchbuffer_size;
900     int slice_type = slice_param->slice_type;
901     int is_inter = (slice_type != HEVC_SLICE_I);
902
903     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
904     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
905     int ctb_size = 1 << log2_ctb_size;
906     int cu_size  = 1 << log2_cu_size;
907
908     int width_in_ctb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
909     int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
910     int width_in_cu  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
911     int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
912     int width_in_mb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
913     int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
914
915     int num_cu_record = 64;
916     int size_shift = 3;
917
918     if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
919         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
920         size_shift = 2;
921
922     if (log2_ctb_size == 5) num_cu_record = 16;
923     else if (log2_ctb_size == 4) num_cu_record = 4;
924     else if (log2_ctb_size == 6) num_cu_record = 64;
925
926     /* frame size in samples, cu,ctu, mb */
927     mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
928     mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
929     mfc_context->pic_size.ctb_size = ctb_size;
930     mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
931     mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
932     mfc_context->pic_size.min_cb_size = cu_size;
933     mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
934     mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
935     mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
936     mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
937
938     slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
939                              (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
940
941     /*Encode common setup for HCP*/
942     /*deblocking */
943     dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
944     mfc_context->deblocking_filter_line_buffer.bo = NULL;
945
946     dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
947     mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
948
949     dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
950     mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
951
952     /* input source */
953     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
954     mfc_context->uncompressed_picture_source.bo = NULL;
955
956     /* metadata */
957     dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
958     mfc_context->metadata_line_buffer.bo = NULL;
959
960     dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
961     mfc_context->metadata_tile_line_buffer.bo = NULL;
962
963     dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
964     mfc_context->metadata_tile_column_buffer.bo = NULL;
965
966     /* sao */
967     dri_bo_unreference(mfc_context->sao_line_buffer.bo);
968     mfc_context->sao_line_buffer.bo = NULL;
969
970     dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
971     mfc_context->sao_tile_line_buffer.bo = NULL;
972
973     dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
974     mfc_context->sao_tile_column_buffer.bo = NULL;
975
976     /* mv temporal buffer */
977     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
978         if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
979             dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
980         mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
981     }
982
983     /* reference */
984     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
985         if (mfc_context->reference_surfaces[i].bo != NULL)
986             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
987         mfc_context->reference_surfaces[i].bo = NULL;
988     }
989
990     /* indirect data CU recording */
991     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
992     mfc_context->hcp_indirect_cu_object.bo = NULL;
993
994     dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
995     mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
996
997     /* Current internal buffer for HCP */
998
999     size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift;
1000     size <<= 6;
1001     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
1002     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1003
1004     size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift;
1005     size <<= 6;
1006     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1007
1008     if (is_inter) {
1009         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
1010         size <<= 6;
1011         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1012
1013         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
1014         size <<= 6;
1015         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1016
1017         size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
1018         size <<= 6;
1019         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1020     } else {
1021         size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
1022         size <<= 6;
1023         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1024
1025         size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
1026         size <<= 6;
1027         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1028
1029         size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
1030         size <<= 6;
1031         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1032     }
1033
1034     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift;
1035     size <<= 6;
1036     ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
1037
1038     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift;
1039     size <<= 6;
1040     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
1041
1042     size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift;
1043     size <<= 6;
1044     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
1045
1046     /////////////////////
1047     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1048     bo = dri_bo_alloc(i965->intel.bufmgr,
1049                       "Indirect data CU Buffer",
1050                       width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
1051                       0x1000);
1052     assert(bo);
1053     mfc_context->hcp_indirect_cu_object.bo = bo;
1054
1055     /* to do pak bse object buffer */
1056     /* to do current collocated mv temporal buffer */
1057
1058     dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
1059     mfc_context->hcp_batchbuffer_surface.bo = NULL;
1060
1061     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1062     mfc_context->aux_batchbuffer_surface.bo = NULL;
1063
1064     if (mfc_context->aux_batchbuffer)
1065         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1066
1067     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
1068     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
1069     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1070     mfc_context->aux_batchbuffer_surface.pitch = 16;
1071     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1072     mfc_context->aux_batchbuffer_surface.size_block = 16;
1073 }
1074
1075 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1076                               struct encode_state *encode_state,
1077                               struct intel_encoder_context *encoder_context)
1078 {
1079     struct intel_batchbuffer *batch = encoder_context->base.batch;
1080
1081     intel_batchbuffer_flush(batch);     //run the pipeline
1082
1083     return VA_STATUS_SUCCESS;
1084 }
1085
1086
1087 static VAStatus
1088 gen9_hcpe_stop(VADriverContextP ctx,
1089                struct encode_state *encode_state,
1090                struct intel_encoder_context *encoder_context,
1091                int *encoded_bits_size)
1092 {
1093     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1094     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1095     VACodedBufferSegment *coded_buffer_segment;
1096
1097     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1098     assert(vaStatus == VA_STATUS_SUCCESS);
1099     *encoded_bits_size = coded_buffer_segment->size * 8;
1100     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1101
1102     return VA_STATUS_SUCCESS;
1103 }
1104
1105
1106 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1107 {
1108     /* to do */
1109     int i, found;
1110     int leading_zero_cnt, byte_length, zero_byte;
1111     int nal_unit_type;
1112     int skip_cnt = 0;
1113
1114 #define NAL_UNIT_TYPE_MASK 0x7e
1115 #define HW_MAX_SKIP_LENGTH 15
1116
1117     byte_length = ALIGN(bits_length, 32) >> 3;
1118
1119
1120     leading_zero_cnt = 0;
1121     found = 0;
1122     for (i = 0; i < byte_length - 4; i++) {
1123         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1124             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1125             found = 1;
1126             break;
1127         }
1128         leading_zero_cnt++;
1129     }
1130     if (!found) {
1131         /* warning message is complained. But anyway it will be inserted. */
1132         WARN_ONCE("Invalid packed header data. "
1133                   "Can't find the 000001 start_prefix code\n");
1134         return 0;
1135     }
1136     i = leading_zero_cnt;
1137
1138     zero_byte = 0;
1139     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1140         zero_byte = 1;
1141
1142     skip_cnt = leading_zero_cnt + zero_byte + 3;
1143
1144     /* the unit header byte is accounted */
1145     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1146     skip_cnt += 1;
1147     skip_cnt += 1;  /* two bytes length of nal headers in hevc */
1148
1149     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1150         /* more unit header bytes are accounted for MVC/SVC */
1151         //skip_cnt += 3;
1152     }
1153     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1154         WARN_ONCE("Too many leading zeros are padded for packed data. "
1155                   "It is beyond the HW range.!!!\n");
1156     }
1157     return skip_cnt;
1158 }
1159
1160 #ifdef HCP_SOFTWARE_SKYLAKE
1161
1162 static int
1163 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1164                           struct intel_encoder_context *encoder_context,
1165                           int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1166                           struct intel_batchbuffer *batch)
1167 {
1168     struct i965_driver_data *i965 = i965_driver_data(ctx);
1169     int len_in_dwords = 3;
1170
1171     if (IS_KBL(i965->intel.device_info) ||
1172         IS_GLK(i965->intel.device_info))
1173         len_in_dwords = 5;
1174
1175     if (batch == NULL)
1176         batch = encoder_context->base.batch;
1177
1178     BEGIN_BCS_BATCH(batch, len_in_dwords);
1179
1180     OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1181     OUT_BCS_BATCH(batch,
1182                   (((isLast_ctb > 0) ? 1 : 0) << 31) |  /* last ctb?*/
1183                   ((cu_count_in_lcu - 1) << 24) |           /* No motion vector */
1184                   split_coding_unit_flag);
1185
1186     OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x);        /* LCU  for Y*/
1187
1188     if (IS_KBL(i965->intel.device_info) ||
1189         IS_GLK(i965->intel.device_info)) {
1190         OUT_BCS_BATCH(batch, 0);
1191         OUT_BCS_BATCH(batch, 0);
1192     }
1193
1194     ADVANCE_BCS_BATCH(batch);
1195
1196     return len_in_dwords;
1197 }
1198
1199 #define     AVC_INTRA_RDO_OFFSET    4
1200 #define     AVC_INTER_RDO_OFFSET    10
1201 #define     AVC_INTER_MSG_OFFSET    8
1202 #define     AVC_INTER_MV_OFFSET     48
1203 #define     AVC_RDO_MASK            0xFFFF
1204
1205 #define     AVC_INTRA_MODE_MASK     0x30
1206 #define     AVC_INTRA_16X16         0x00
1207 #define     AVC_INTRA_8X8           0x01
1208 #define     AVC_INTRA_4X4           0x02
1209
1210 #define     AVC_INTER_MODE_MASK     0x03
1211 #define     AVC_INTER_8X8           0x03
1212 #define     AVC_INTER_8X16          0x02
1213 #define     AVC_INTER_16X8          0x01
1214 #define     AVC_INTER_16X16         0x00
1215 #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
1216
1217 /* VME output message, write back message */
1218 #define     AVC_INTER_SUBMB_PRE_MODE_MASK       0x00ff0000
1219 #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
1220
1221 /* here 1 MB = 1CU = 16x16 */
1222 static void
1223 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1224                                       struct encode_state *encode_state,
1225                                       struct intel_encoder_context *encoder_context,
1226                                       int qp, unsigned int *msg,
1227                                       int ctb_x, int ctb_y,
1228                                       int mb_x, int mb_y,
1229                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index, int index)
1230 {
1231     /* here cu == mb, so we use mb address as the cu address */
1232     /* to fill the indirect cu by the vme out */
1233     static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1234     static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1235     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1236     unsigned char * cu_record_ptr = NULL;
1237     unsigned int * cu_msg = NULL;
1238     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1239     int mb_address_in_ctb = 0;
1240     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1241     int zero = 0;
1242     int is_inter = 0;
1243     int intraMbMode = 0;
1244     int cu_part_mode = 0;
1245     int intraMode[4];
1246     int inerpred_idc = 0;
1247     int intra_chroma_mode = 5;
1248     int cu_size = 1;
1249     int tu_size = 0x55;
1250     int tu_count = 4;
1251     int chroma_mode_remap[4] = {5, 4, 3, 2};
1252
1253     if (!is_inter) inerpred_idc = 0xff;
1254
1255     intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1256
1257     intra_chroma_mode = (msg[3] & 0x3);
1258     intra_chroma_mode =  chroma_mode_remap[intra_chroma_mode];
1259     if (intraMbMode == AVC_INTRA_16X16) {
1260         cu_part_mode = 0; //2Nx2N
1261         cu_size = 1;
1262         tu_size = 0x55;
1263         tu_count = 4;
1264         intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1265         intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1266         intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1267         intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1268     } else if (intraMbMode == AVC_INTRA_8X8) {
1269         cu_part_mode = 0; //2Nx2N
1270         cu_size = 0;
1271         tu_size = 0;
1272         tu_count = 4;
1273         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1274         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1275         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1276         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1277
1278     } else { // for 4x4 to use 8x8 replace
1279         cu_part_mode = 3; //NxN
1280         cu_size = 0;
1281         tu_size = 0;
1282         tu_count = 4;
1283         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1284         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1285         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1286         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1287
1288     }
1289
1290     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1291     /* get the mb info from the vme out */
1292     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1293
1294     cu_msg[0] = (inerpred_idc << 24 |   /* interpred_idc[3:0][1:0] */
1295                  zero << 23 |   /* reserved */
1296                  qp << 16 | /* CU_qp */
1297                  zero << 11 |   /* reserved */
1298                  intra_chroma_mode << 8 |   /* intra_chroma_mode */
1299                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
1300                  cu_part_mode << 4 |    /* cu_part_mode */
1301                  zero << 3 |    /* cu_transquant_bypass_flag */
1302                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
1303                  cu_size          /* cu_size */
1304                 );
1305     cu_msg[1] = (zero << 30 |   /* reserved  */
1306                  intraMode[3] << 24 |   /* intra_mode */
1307                  zero << 22 |   /* reserved  */
1308                  intraMode[2] << 16 |   /* intra_mode */
1309                  zero << 14 |   /* reserved  */
1310                  intraMode[1] << 8 |    /* intra_mode */
1311                  zero << 6 |    /* reserved  */
1312                  intraMode[0]           /* intra_mode */
1313                 );
1314     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1315     cu_msg[2] = (zero << 16 |   /* mvx_l0[1]  */
1316                  zero           /* mvx_l0[0] */
1317                 );
1318     cu_msg[3] = (zero << 16 |   /* mvx_l0[3]  */
1319                  zero           /* mvx_l0[2] */
1320                 );
1321     cu_msg[4] = (zero << 16 |   /* mvy_l0[1]  */
1322                  zero           /* mvy_l0[0] */
1323                 );
1324     cu_msg[5] = (zero << 16 |   /* mvy_l0[3]  */
1325                  zero           /* mvy_l0[2] */
1326                 );
1327
1328     cu_msg[6] = (zero << 16 |   /* mvx_l1[1]  */
1329                  zero           /* mvx_l1[0] */
1330                 );
1331     cu_msg[7] = (zero << 16 |   /* mvx_l1[3]  */
1332                  zero           /* mvx_l1[2] */
1333                 );
1334     cu_msg[8] = (zero << 16 |   /* mvy_l1[1]  */
1335                  zero           /* mvy_l1[0] */
1336                 );
1337     cu_msg[9] = (zero << 16 |   /* mvy_l1[3]  */
1338                  zero           /* mvy_l1[2] */
1339                 );
1340
1341     cu_msg[10] = (zero << 28 |  /* ref_idx_l1[3]  */
1342                   zero << 24 |  /* ref_idx_l1[2] */
1343                   zero << 20 |  /* ref_idx_l1[1]  */
1344                   zero << 16 |  /* ref_idx_l1[0] */
1345                   zero << 12 |  /* ref_idx_l0[3]  */
1346                   zero << 8 |   /* ref_idx_l0[2] */
1347                   zero << 4 |   /* ref_idx_l0[1]  */
1348                   zero          /* ref_idx_l0[0] */
1349                  );
1350
1351     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
1352     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1353                   zero << 16 |  /* reserved  */
1354                   zero          /* tu_xform_Yskip[15:0] */
1355                  );
1356     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
1357                   zero          /* tu_xform_Uskip[15:0] */
1358                  );
1359     cu_msg[14] = zero ;
1360     cu_msg[15] = zero ;
1361 }
1362
1363 /* here 1 MB = 1CU = 16x16 */
1364 static void
1365 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1366                                       struct encode_state *encode_state,
1367                                       struct intel_encoder_context *encoder_context,
1368                                       int qp, unsigned int *msg,
1369                                       int ctb_x, int ctb_y,
1370                                       int mb_x, int mb_y,
1371                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index, int index)
1372 {
1373     /* here cu == mb, so we use mb address as the cu address */
1374     /* to fill the indirect cu by the vme out */
1375     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1376     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1377     unsigned char * cu_record_ptr = NULL;
1378     unsigned int * cu_msg = NULL;
1379     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1380     int mb_address_in_ctb = 0;
1381     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1382     int zero = 0;
1383     int cu_part_mode = 0;
1384     int submb_pre_mode = 0;
1385     int is_inter = 1;
1386     int cu_size = 1;
1387     int tu_size = 0x55;
1388     int tu_count = 4;
1389     int inter_mode = 0;
1390
1391     unsigned int *mv_ptr;
1392     {
1393         inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1394         submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1395 #define MSG_MV_OFFSET   4
1396         mv_ptr = msg + MSG_MV_OFFSET;
1397         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1398         * to convert them to be compatible with the format of AVC_PAK
1399         * command.
1400         */
1401         /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1402
1403         if (inter_mode == AVC_INTER_16X16) {
1404             mv_ptr[4] = mv_ptr[0];
1405             mv_ptr[5] = mv_ptr[1];
1406             mv_ptr[2] = mv_ptr[0];
1407             mv_ptr[3] = mv_ptr[1];
1408             mv_ptr[6] = mv_ptr[0];
1409             mv_ptr[7] = mv_ptr[1];
1410             cu_part_mode = 0;
1411             cu_size = 1;
1412             tu_size = 0x55;
1413             tu_count = 4;
1414         } else if (inter_mode == AVC_INTER_8X16) {
1415             mv_ptr[4] = mv_ptr[0];
1416             mv_ptr[5] = mv_ptr[1];
1417             mv_ptr[2] = mv_ptr[8];
1418             mv_ptr[3] = mv_ptr[9];
1419             mv_ptr[6] = mv_ptr[8];
1420             mv_ptr[7] = mv_ptr[9];
1421             cu_part_mode = 1;
1422             cu_size = 1;
1423             tu_size = 0x55;
1424             tu_count = 4;
1425         } else if (inter_mode == AVC_INTER_16X8) {
1426             mv_ptr[2] = mv_ptr[0];
1427             mv_ptr[3] = mv_ptr[1];
1428             mv_ptr[4] = mv_ptr[16];
1429             mv_ptr[5] = mv_ptr[17];
1430             mv_ptr[6] = mv_ptr[24];
1431             mv_ptr[7] = mv_ptr[25];
1432             cu_part_mode = 2;
1433             cu_size = 1;
1434             tu_size = 0x55;
1435             tu_count = 4;
1436         } else if (inter_mode == AVC_INTER_8X8) {
1437             mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1438             mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1439             mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1440             mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1441             mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1442             mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1443             mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1444             mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1445             cu_part_mode = 0;
1446             cu_size = 0;
1447             tu_size = 0x0;
1448             tu_count = 4;
1449
1450         } else {
1451             mv_ptr[4] = mv_ptr[0];
1452             mv_ptr[5] = mv_ptr[1];
1453             mv_ptr[2] = mv_ptr[0];
1454             mv_ptr[3] = mv_ptr[1];
1455             mv_ptr[6] = mv_ptr[0];
1456             mv_ptr[7] = mv_ptr[1];
1457             cu_part_mode = 0;
1458             cu_size = 1;
1459             tu_size = 0x55;
1460             tu_count = 4;
1461
1462         }
1463     }
1464
1465     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1466     /* get the mb info from the vme out */
1467     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1468
1469     cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1470                  zero << 23 |   /* reserved */
1471                  qp << 16 | /* CU_qp */
1472                  zero << 11 |   /* reserved */
1473                  5 << 8 |   /* intra_chroma_mode */
1474                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
1475                  cu_part_mode << 4 |    /* cu_part_mode */
1476                  zero << 3 |    /* cu_transquant_bypass_flag */
1477                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
1478                  cu_size          /* cu_size */
1479                 );
1480     cu_msg[1] = (zero << 30 |   /* reserved  */
1481                  zero << 24 |   /* intra_mode */
1482                  zero << 22 |   /* reserved  */
1483                  zero << 16 |   /* intra_mode */
1484                  zero << 14 |   /* reserved  */
1485                  zero << 8 |    /* intra_mode */
1486                  zero << 6 |    /* reserved  */
1487                  zero           /* intra_mode */
1488                 );
1489     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1490     cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 |   /* mvx_l0[1]  */
1491                  (mv_ptr[0] & 0xffff)           /* mvx_l0[0] */
1492                 );
1493     cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 |   /* mvx_l0[3]  */
1494                  (mv_ptr[4] & 0xffff)           /* mvx_l0[2] */
1495                 );
1496     cu_msg[4] = ((mv_ptr[2] & 0xffff0000) |         /* mvy_l0[1]  */
1497                  (mv_ptr[0] & 0xffff0000) >> 16     /* mvy_l0[0] */
1498                 );
1499     cu_msg[5] = ((mv_ptr[6] & 0xffff0000) |         /* mvy_l0[3]  */
1500                  (mv_ptr[4] & 0xffff0000) >> 16     /* mvy_l0[2] */
1501                 );
1502
1503     cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 |   /* mvx_l1[1]  */
1504                  (mv_ptr[1] & 0xffff)           /* mvx_l1[0] */
1505                 );
1506     cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 |   /* mvx_l1[3]  */
1507                  (mv_ptr[5] & 0xffff)           /* mvx_l1[2] */
1508                 );
1509     cu_msg[8] = ((mv_ptr[3] & 0xffff0000) |         /* mvy_l1[1]  */
1510                  (mv_ptr[1] & 0xffff0000) >> 16     /* mvy_l1[0] */
1511                 );
1512     cu_msg[9] = ((mv_ptr[7] & 0xffff0000) |         /* mvy_l1[3]  */
1513                  (mv_ptr[5] & 0xffff0000) >> 16     /* mvy_l1[2] */
1514                 );
1515
1516     cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 |   /* ref_idx_l1[3]  */
1517                   ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 |   /* ref_idx_l1[2] */
1518                   ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 |    /* ref_idx_l1[1]  */
1519                   ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 |    /* ref_idx_l1[0] */
1520                   ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 |   /* ref_idx_l0[3]  */
1521                   ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8  |   /* ref_idx_l0[2] */
1522                   ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 |     /* ref_idx_l0[1]  */
1523                   ((vme_context->ref_index_in_mb[0] >> 0) & 0xf)            /* ref_idx_l0[0] */
1524                  );
1525
1526     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
1527     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1528                   zero << 16 |  /* reserved  */
1529                   zero          /* tu_xform_Yskip[15:0] */
1530                  );
1531     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
1532                   zero          /* tu_xform_Uskip[15:0] */
1533                  );
1534     cu_msg[14] = zero ;
1535     cu_msg[15] = zero ;
1536 }
1537
1538 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1539 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1540 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1541 #define HEVC_SPLIT_CU_FLAG_8_8   ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1542
1543
1544 void
1545 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1546                                     struct encode_state *encode_state,
1547                                     struct intel_encoder_context *encoder_context,
1548                                     int slice_index,
1549                                     struct intel_batchbuffer *slice_batch)
1550 {
1551     int count, i, start_index;
1552     unsigned int length_in_bits;
1553     VAEncPackedHeaderParameterBuffer *param = NULL;
1554     unsigned int *header_data = NULL;
1555     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1556     int slice_header_index;
1557
1558     if (encode_state->slice_header_index[slice_index] == 0)
1559         slice_header_index = -1;
1560     else
1561         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1562
1563     count = encode_state->slice_rawdata_count[slice_index];
1564     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1565
1566     for (i = 0; i < count; i++) {
1567         unsigned int skip_emul_byte_cnt;
1568
1569         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1570
1571         param = (VAEncPackedHeaderParameterBuffer *)
1572                 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1573
1574         /* skip the slice header packed data type as it is lastly inserted */
1575         if (param->type == VAEncPackedHeaderSlice)
1576             continue;
1577
1578         length_in_bits = param->bit_length;
1579
1580         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1581
1582         /* as the slice header is still required, the last header flag is set to
1583          * zero.
1584          */
1585         mfc_context->insert_object(ctx,
1586                                    encoder_context,
1587                                    header_data,
1588                                    ALIGN(length_in_bits, 32) >> 5,
1589                                    length_in_bits & 0x1f,
1590                                    skip_emul_byte_cnt,
1591                                    0,
1592                                    0,
1593                                    !param->has_emulation_bytes,
1594                                    slice_batch);
1595     }
1596
1597     if (slice_header_index == -1) {
1598         unsigned char *slice_header = NULL;
1599         int slice_header_length_in_bits = 0;
1600         VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1601         VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1602         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1603
1604         /* For the Normal HEVC */
1605         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1606                                                               pPicParameter,
1607                                                               pSliceParameter,
1608                                                               &slice_header,
1609                                                               0);
1610         mfc_context->insert_object(ctx, encoder_context,
1611                                    (unsigned int *)slice_header,
1612                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1613                                    slice_header_length_in_bits & 0x1f,
1614                                    5,  /* first 6 bytes are start code + nal unit type */
1615                                    1, 0, 1, slice_batch);
1616         free(slice_header);
1617     } else {
1618         unsigned int skip_emul_byte_cnt;
1619
1620         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1621
1622         param = (VAEncPackedHeaderParameterBuffer *)
1623                 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1624         length_in_bits = param->bit_length;
1625
1626         /* as the slice header is the last header data for one slice,
1627          * the last header flag is set to one.
1628          */
1629         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1630
1631         mfc_context->insert_object(ctx,
1632                                    encoder_context,
1633                                    header_data,
1634                                    ALIGN(length_in_bits, 32) >> 5,
1635                                    length_in_bits & 0x1f,
1636                                    skip_emul_byte_cnt,
1637                                    1,
1638                                    0,
1639                                    !param->has_emulation_bytes,
1640                                    slice_batch);
1641     }
1642
1643     return;
1644 }
1645
1646 static void
1647 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1648                                          struct encode_state *encode_state,
1649                                          struct intel_encoder_context *encoder_context,
1650                                          int slice_index,
1651                                          struct intel_batchbuffer *slice_batch)
1652 {
1653     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1654     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1655     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1656     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1657     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1658     int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1659     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1660     //unsigned char *slice_header = NULL;     // for future use
1661     //int slice_header_length_in_bits = 0;
1662     unsigned int tail_data[] = { 0x0, 0x0 };
1663     int slice_type = pSliceParameter->slice_type;
1664
1665     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1666     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1667     int ctb_size = 1 << log2_ctb_size;
1668     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1669     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1670     int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1671     int ctb_width_in_mb = (ctb_size + 15) / 16;
1672     int i_ctb, ctb_x, ctb_y;
1673     unsigned int split_coding_unit_flag = 0;
1674     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1675     int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size) > 0 ? 1 : 0;
1676     int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size) > 0 ? 1 : 0;
1677
1678     int is_intra = (slice_type == HEVC_SLICE_I);
1679     unsigned int *msg = NULL;
1680     unsigned char *msg_ptr = NULL;
1681     int macroblock_address = 0;
1682     int num_cu_record = 64;
1683     int cu_count = 1;
1684     int tmp_mb_mode = 0;
1685     int mb_x = 0, mb_y = 0;
1686     int mb_addr = 0;
1687     int cu_index = 0;
1688     int inter_rdo, intra_rdo;
1689     int qp;
1690     int drop_cu_row_in_last_mb = 0;
1691     int drop_cu_column_in_last_mb = 0;
1692
1693     if (log2_ctb_size == 5) num_cu_record = 16;
1694     else if (log2_ctb_size == 4) num_cu_record = 4;
1695     else if (log2_ctb_size == 6) num_cu_record = 64;
1696
1697     qp = qp_slice;
1698     if (rate_control_mode == VA_RC_CBR) {
1699         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1700         if (slice_type == HEVC_SLICE_B) {
1701             if (pSequenceParameter->ip_period == 1) {
1702                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1703
1704             } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
1705                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1706             }
1707         }
1708         if (encode_state->slice_header_index[slice_index] == 0) {
1709             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1710         }
1711     }
1712
1713     /* only support for 8-bit pixel bit-depth */
1714     assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2);
1715     assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2);
1716     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1717     assert(qp >= 0 && qp < 52);
1718
1719     {
1720         gen9_hcpe_hevc_slice_state(ctx,
1721                                    pPicParameter,
1722                                    pSliceParameter,
1723                                    encode_state, encoder_context,
1724                                    slice_batch);
1725
1726         if (slice_index == 0)
1727             intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1728
1729         intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1730
1731         /*
1732         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1733         int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1734
1735         mfc_context->insert_object(ctx, encoder_context,
1736                                    (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1737                                     skip_emul_byte_cnt,
1738                                     1, 0, 1, slice_batch);
1739         free(slice_header);
1740         */
1741     }
1742
1743
1744
1745     split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1746
1747     dri_bo_map(vme_context->vme_output.bo , 1);
1748     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1749     dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1750
1751     for (i_ctb = pSliceParameter->slice_segment_address; i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1752         int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1753         int ctb_height_in_mb_internal = ctb_width_in_mb;
1754         int ctb_width_in_mb_internal = ctb_width_in_mb;
1755         int max_cu_num_in_mb = 4;
1756
1757         ctb_x = i_ctb % width_in_ctb;
1758         ctb_y = i_ctb / width_in_ctb;
1759
1760         drop_cu_row_in_last_mb = 0;
1761         drop_cu_column_in_last_mb = 0;
1762
1763         if (ctb_y == (height_in_ctb - 1) && row_pad_flag) {
1764             ctb_height_in_mb_internal = (pSequenceParameter->pic_height_in_luma_samples - (ctb_y * ctb_size) + 15) / 16;
1765
1766             if ((log2_cu_size == 3) && (pSequenceParameter->pic_height_in_luma_samples % 16))
1767                 drop_cu_row_in_last_mb = (16 - (pSequenceParameter->pic_height_in_luma_samples % 16)) >> log2_cu_size;
1768         }
1769
1770         if (ctb_x == (width_in_ctb - 1) && col_pad_flag) {
1771             ctb_width_in_mb_internal = (pSequenceParameter->pic_width_in_luma_samples - (ctb_x * ctb_size) + 15) / 16;
1772
1773             if ((log2_cu_size == 3) && (pSequenceParameter->pic_width_in_luma_samples % 16))
1774                 drop_cu_column_in_last_mb = (16 - (pSequenceParameter->pic_width_in_luma_samples % 16)) >> log2_cu_size;
1775         }
1776
1777         mb_x = 0;
1778         mb_y = 0;
1779         macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1780         split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1781         cu_count = 1;
1782         cu_index = 0;
1783         mb_addr = 0;
1784         msg = NULL;
1785         for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++) {
1786             mb_addr = macroblock_address + mb_y * width_in_mbs ;
1787             for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++) {
1788                 max_cu_num_in_mb = 4;
1789                 if (drop_cu_row_in_last_mb && (mb_y == ctb_height_in_mb_internal - 1))
1790                     max_cu_num_in_mb /= 2;
1791
1792                 if (drop_cu_column_in_last_mb && (mb_x == ctb_width_in_mb_internal - 1))
1793                     max_cu_num_in_mb /= 2;
1794
1795                 /* get the mb info from the vme out */
1796                 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1797
1798                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1799                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1800                 /*fill to indirect cu */
1801                 /*to do */
1802                 if (is_intra || intra_rdo < inter_rdo) {
1803                     /* fill intra cu */
1804                     tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1805                     if (max_cu_num_in_mb < 4) {
1806                         if (tmp_mb_mode == AVC_INTRA_16X16) {
1807                             msg[0] = (msg[0] & !AVC_INTRA_MODE_MASK) | (AVC_INTRA_8X8 << 4);
1808                             tmp_mb_mode = AVC_INTRA_8X8;
1809                         }
1810
1811                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1812                         if (--max_cu_num_in_mb > 0)
1813                             gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1814
1815                         if (ctb_width_in_mb == 2)
1816                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1817                         else if (ctb_width_in_mb == 1)
1818                             split_coding_unit_flag |= 0x1 << 20;
1819                     } else if (tmp_mb_mode == AVC_INTRA_16X16) {
1820                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1821                     } else { // for 4x4 to use 8x8 replace
1822                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1823                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1824                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1825                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 3);
1826                         if (ctb_width_in_mb == 2)
1827                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1828                         else if (ctb_width_in_mb == 1)
1829                             split_coding_unit_flag |= 0x1 << 20;
1830                     }
1831                 } else {
1832                     msg += AVC_INTER_MSG_OFFSET;
1833                     /* fill inter cu */
1834                     tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1835                     if (max_cu_num_in_mb < 4) {
1836                         if (tmp_mb_mode != AVC_INTER_8X8) {
1837                             msg[0] = (msg[0] & !AVC_INTER_MODE_MASK) | AVC_INTER_8X8;
1838                             tmp_mb_mode = AVC_INTER_8X8;
1839                         }
1840                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1841                         if (--max_cu_num_in_mb > 0)
1842                             gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1843
1844                         if (ctb_width_in_mb == 2)
1845                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1846                         else if (ctb_width_in_mb == 1)
1847                             split_coding_unit_flag |= 0x1 << 20;
1848                     } else if (tmp_mb_mode == AVC_INTER_8X8) {
1849                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1850                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1851                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1852                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 3);
1853                         if (ctb_width_in_mb == 2)
1854                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1855                         else if (ctb_width_in_mb == 1)
1856                             split_coding_unit_flag |= 0x1 << 20;
1857
1858                     } else if (tmp_mb_mode == AVC_INTER_16X16 ||
1859                                tmp_mb_mode == AVC_INTER_8X16 ||
1860                                tmp_mb_mode == AVC_INTER_16X8) {
1861                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1862                     }
1863                 }
1864                 mb_addr++;
1865             }
1866         }
1867
1868         cu_count = cu_index;
1869         // PAK object fill accordingly.
1870         gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1871     }
1872
1873     dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1874     dri_bo_unmap(vme_context->vme_output.bo);
1875
1876     if (last_slice) {
1877         mfc_context->insert_object(ctx, encoder_context,
1878                                    tail_data, 2, 8,
1879                                    2, 1, 1, 0, slice_batch);
1880     } else {
1881         mfc_context->insert_object(ctx, encoder_context,
1882                                    tail_data, 1, 8,
1883                                    1, 1, 1, 0, slice_batch);
1884     }
1885 }
1886
1887 static dri_bo *
1888 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1889                                     struct encode_state *encode_state,
1890                                     struct intel_encoder_context *encoder_context)
1891 {
1892     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1893     struct intel_batchbuffer *batch;
1894     dri_bo *batch_bo;
1895     int i;
1896
1897     batch = mfc_context->aux_batchbuffer;
1898     batch_bo = batch->buffer;
1899
1900     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1901         gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1902     }
1903
1904     intel_batchbuffer_align(batch, 8);
1905
1906     BEGIN_BCS_BATCH(batch, 2);
1907     OUT_BCS_BATCH(batch, 0);
1908     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1909     ADVANCE_BCS_BATCH(batch);
1910
1911     dri_bo_reference(batch_bo);
1912     intel_batchbuffer_free(batch);
1913     mfc_context->aux_batchbuffer = NULL;
1914
1915     return batch_bo;
1916 }
1917
1918 #else
1919
1920 #endif
1921
1922 static void
1923 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1924                                    struct encode_state *encode_state,
1925                                    struct intel_encoder_context *encoder_context)
1926 {
1927     struct i965_driver_data *i965 = i965_driver_data(ctx);
1928     struct intel_batchbuffer *batch = encoder_context->base.batch;
1929     dri_bo *slice_batch_bo;
1930
1931 #ifdef HCP_SOFTWARE_SKYLAKE
1932     slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1933 #else
1934     slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1935 #endif
1936
1937     // begin programing
1938     if (i965->intel.has_bsd2)
1939         intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1940     else
1941         intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1942     intel_batchbuffer_emit_mi_flush(batch);
1943
1944     // picture level programing
1945     gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1946
1947     BEGIN_BCS_BATCH(batch, 3);
1948     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1949     OUT_BCS_RELOC64(batch,
1950                     slice_batch_bo,
1951                     I915_GEM_DOMAIN_COMMAND, 0,
1952                     0);
1953     ADVANCE_BCS_BATCH(batch);
1954
1955     // end programing
1956     intel_batchbuffer_end_atomic(batch);
1957
1958     dri_bo_unreference(slice_batch_bo);
1959 }
1960
1961 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1962                                                 struct encode_state *encode_state,
1963                                                 struct intel_encoder_context *encoder_context,
1964                                                 struct intel_batchbuffer *slice_batch)
1965 {
1966     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1967     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1968     unsigned int skip_emul_byte_cnt;
1969
1970     if (encode_state->packed_header_data[idx]) {
1971         VAEncPackedHeaderParameterBuffer *param = NULL;
1972         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1973         unsigned int length_in_bits;
1974
1975         assert(encode_state->packed_header_param[idx]);
1976         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1977         length_in_bits = param->bit_length;
1978
1979         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1980         mfc_context->insert_object(ctx,
1981                                    encoder_context,
1982                                    header_data,
1983                                    ALIGN(length_in_bits, 32) >> 5,
1984                                    length_in_bits & 0x1f,
1985                                    skip_emul_byte_cnt,
1986                                    0,
1987                                    0,
1988                                    !param->has_emulation_bytes,
1989                                    slice_batch);
1990     }
1991
1992     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
1993
1994     if (encode_state->packed_header_data[idx]) {
1995         VAEncPackedHeaderParameterBuffer *param = NULL;
1996         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1997         unsigned int length_in_bits;
1998
1999         assert(encode_state->packed_header_param[idx]);
2000         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2001         length_in_bits = param->bit_length;
2002
2003         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2004         mfc_context->insert_object(ctx,
2005                                    encoder_context,
2006                                    header_data,
2007                                    ALIGN(length_in_bits, 32) >> 5,
2008                                    length_in_bits & 0x1f,
2009                                    skip_emul_byte_cnt,
2010                                    0,
2011                                    0,
2012                                    !param->has_emulation_bytes,
2013                                    slice_batch);
2014     }
2015
2016     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
2017
2018     if (encode_state->packed_header_data[idx]) {
2019         VAEncPackedHeaderParameterBuffer *param = NULL;
2020         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2021         unsigned int length_in_bits;
2022
2023         assert(encode_state->packed_header_param[idx]);
2024         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2025         length_in_bits = param->bit_length;
2026
2027         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2028
2029         mfc_context->insert_object(ctx,
2030                                    encoder_context,
2031                                    header_data,
2032                                    ALIGN(length_in_bits, 32) >> 5,
2033                                    length_in_bits & 0x1f,
2034                                    skip_emul_byte_cnt,
2035                                    0,
2036                                    0,
2037                                    !param->has_emulation_bytes,
2038                                    slice_batch);
2039     }
2040
2041     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
2042
2043     if (encode_state->packed_header_data[idx]) {
2044         VAEncPackedHeaderParameterBuffer *param = NULL;
2045         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2046         unsigned int length_in_bits;
2047
2048         assert(encode_state->packed_header_param[idx]);
2049         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2050         length_in_bits = param->bit_length;
2051
2052         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2053         mfc_context->insert_object(ctx,
2054                                    encoder_context,
2055                                    header_data,
2056                                    ALIGN(length_in_bits, 32) >> 5,
2057                                    length_in_bits & 0x1f,
2058                                    skip_emul_byte_cnt,
2059                                    0,
2060                                    0,
2061                                    !param->has_emulation_bytes,
2062                                    slice_batch);
2063     }
2064 }
2065
2066 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
2067                                  struct encode_state *encode_state,
2068                                  struct intel_encoder_context *encoder_context)
2069 {
2070     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2071     struct object_surface *obj_surface;
2072     struct object_buffer *obj_buffer;
2073     GenHevcSurface *hevc_encoder_surface;
2074     dri_bo *bo;
2075     VAStatus vaStatus = VA_STATUS_SUCCESS;
2076     int i;
2077     struct i965_coded_buffer_segment *coded_buffer_segment;
2078
2079     /*Setup all the input&output object*/
2080
2081     /* Setup current frame and current direct mv buffer*/
2082     obj_surface = encode_state->reconstructed_object;
2083
2084     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2085     assert(hevc_encoder_surface);
2086
2087     if (hevc_encoder_surface) {
2088         hevc_encoder_surface->has_p010_to_nv12_done = 0;
2089         hevc_encoder_surface->base.frame_store_id = -1;
2090         mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2091         dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2092     }
2093
2094     mfc_context->surface_state.width = obj_surface->orig_width;
2095     mfc_context->surface_state.height = obj_surface->orig_height;
2096     mfc_context->surface_state.w_pitch = obj_surface->width;
2097     mfc_context->surface_state.h_pitch = obj_surface->height;
2098
2099     /* Setup reference frames and direct mv buffers*/
2100     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2101         obj_surface = encode_state->reference_objects[i];
2102
2103         if (obj_surface && obj_surface->bo) {
2104             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2105             dri_bo_reference(obj_surface->bo);
2106
2107             /* Check MV temporal buffer */
2108             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2109             assert(hevc_encoder_surface);
2110
2111             if (hevc_encoder_surface) {
2112                 hevc_encoder_surface->base.frame_store_id = -1;
2113                 /* Setup MV temporal buffer */
2114                 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2115                 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2116             }
2117         } else {
2118             break;
2119         }
2120     }
2121
2122
2123     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2124     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2125
2126     obj_buffer = encode_state->coded_buf_object;
2127     bo = obj_buffer->buffer_store->bo;
2128     mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2129     mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2130     mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2131     dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2132
2133     dri_bo_map(bo, 1);
2134     coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2135     coded_buffer_segment->mapped = 0;
2136     coded_buffer_segment->codec = encoder_context->codec;
2137     dri_bo_unmap(bo);
2138
2139     return vaStatus;
2140 }
2141
2142 /* HEVC BRC related */
2143
2144 static void
2145 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2146                                          struct intel_encoder_context *encoder_context)
2147 {
2148     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2149     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2150     int ctb_size = 16;
2151     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2152     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2153
2154     double fps = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2155     double bitrate = encoder_context->brc.bits_per_second[0];
2156     int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2157     int intra_mb_size = inter_mb_size * 5.0;
2158     int i;
2159
2160     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2161     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2162     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2163     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2164     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2165     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2166
2167     for (i = 0 ; i < 3; i++) {
2168         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2169         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2170         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2171         mfc_context->bit_rate_control_context[i].GrowInit = 6;
2172         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2173         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2174         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2175
2176         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2177         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2178         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2179         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2180         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2181         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2182     }
2183
2184     mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2185     mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2186     mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2187
2188     mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2189     mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2190     mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2191 }
2192
2193 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2194                                 struct intel_encoder_context* encoder_context)
2195 {
2196     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2197     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2198
2199     double bitrate = (double)encoder_context->brc.bits_per_second[0];
2200     double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2201     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2202     int intra_period = pSequenceParameter->intra_period;
2203     int ip_period = pSequenceParameter->ip_period;
2204     double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2205     double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2206     double bpf;
2207     int ratio_min = 1;
2208     int ratio_max = 32;
2209     int ratio = 8;
2210     double buffer_size = 0;
2211     int bpp = 1;
2212
2213     if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) ||
2214         (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2215         bpp = 2;
2216
2217     qp1_size = qp1_size * bpp;
2218     qp51_size = qp51_size * bpp;
2219
2220     if (pSequenceParameter->ip_period) {
2221         pnum = (intra_period + ip_period - 1) / ip_period - 1;
2222         bnum = intra_period - inum - pnum;
2223     }
2224
2225     mfc_context->brc.mode = encoder_context->rate_control_mode;
2226
2227     mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2228                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2229     mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2230     mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2231
2232     mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2233     mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2234     mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2235
2236     bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2237
2238     if (!encoder_context->brc.hrd_buffer_size) {
2239         mfc_context->hrd.buffer_size = bitrate * ratio;
2240         mfc_context->hrd.current_buffer_fullness =
2241             (double)(bitrate * ratio / 2 < mfc_context->hrd.buffer_size) ?
2242             bitrate * ratio / 2 : mfc_context->hrd.buffer_size / 2.;
2243     } else {
2244         buffer_size = (double)encoder_context->brc.hrd_buffer_size;
2245         if (buffer_size < bitrate * ratio_min) {
2246             buffer_size = bitrate * ratio_min;
2247         } else if (buffer_size > bitrate * ratio_max) {
2248             buffer_size = bitrate * ratio_max ;
2249         }
2250         mfc_context->hrd.buffer_size = buffer_size;
2251         if (encoder_context->brc.hrd_initial_buffer_fullness) {
2252             mfc_context->hrd.current_buffer_fullness =
2253                 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2254                 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2255         } else {
2256             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
2257
2258         }
2259     }
2260
2261     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2262     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2263     mfc_context->hrd.violation_noted = 0;
2264
2265     if ((bpf > qp51_size) && (bpf < qp1_size)) {
2266         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2267     } else if (bpf >= qp1_size)
2268         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2269     else if (bpf <= qp51_size)
2270         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2271
2272     mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2273     mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2274
2275     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36);
2276     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40);
2277     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45);
2278 }
2279
2280 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2281                           struct gen9_hcpe_context *mfc_context,
2282                           int frame_bits)
2283 {
2284     double prev_bf = mfc_context->hrd.current_buffer_fullness;
2285
2286     mfc_context->hrd.current_buffer_fullness -= frame_bits;
2287
2288     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2289         mfc_context->hrd.current_buffer_fullness = prev_bf;
2290         return BRC_UNDERFLOW;
2291     }
2292
2293     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2294     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2295         if (mfc_context->brc.mode == VA_RC_VBR)
2296             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2297         else {
2298             mfc_context->hrd.current_buffer_fullness = prev_bf;
2299             return BRC_OVERFLOW;
2300         }
2301     }
2302     return BRC_NO_HRD_VIOLATION;
2303 }
2304
2305 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2306                             struct gen9_hcpe_context *mfc_context,
2307                             int frame_bits)
2308 {
2309     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2310     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2311     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2312     int slicetype = pSliceParameter->slice_type;
2313     int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2314     int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2315     int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2316     int qp; // quantizer of previously encoded slice of current type
2317     int qpn; // predicted quantizer for next frame of current type in integer format
2318     double qpf; // predicted quantizer for next frame of current type in float format
2319     double delta_qp; // QP correction
2320     int target_frame_size, frame_size_next;
2321     /* Notes:
2322      *  x - how far we are from HRD buffer borders
2323      *  y - how far we are from target HRD buffer fullness
2324      */
2325     double x, y;
2326     double frame_size_alpha;
2327
2328     if (slicetype == HEVC_SLICE_B) {
2329         if (pSequenceParameter->ip_period == 1) {
2330             slicetype = HEVC_SLICE_P;
2331         } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
2332             slicetype = HEVC_SLICE_P;
2333         }
2334     }
2335
2336     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2337
2338     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2339     if (mfc_context->hrd.buffer_capacity < 5)
2340         frame_size_alpha = 0;
2341     else
2342         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2343     if (frame_size_alpha > 30) frame_size_alpha = 30;
2344     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2345                       (double)(frame_size_alpha + 1.);
2346
2347     /* frame_size_next: avoiding negative number and too small value */
2348     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2349         frame_size_next = (int)((double)target_frame_size * 0.25);
2350
2351     qpf = (double)qp * target_frame_size / frame_size_next;
2352     qpn = (int)(qpf + 0.5);
2353
2354     if (qpn == qp) {
2355         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2356         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2357         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2358             qpn++;
2359             mfc_context->brc.qpf_rounding_accumulator = 0.;
2360         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2361             qpn--;
2362             mfc_context->brc.qpf_rounding_accumulator = 0.;
2363         }
2364     }
2365     /* making sure that QP is not changing too fast */
2366     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2367     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2368     /* making sure that with QP predictions we did do not leave QPs range */
2369     BRC_CLIP(qpn, 1, 51);
2370
2371     /* checking wthether HRD compliance is still met */
2372     sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2373
2374     /* calculating QP delta as some function*/
2375     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2376     if (x > 0) {
2377         x /= mfc_context->hrd.target_buffer_fullness;
2378         y = mfc_context->hrd.current_buffer_fullness;
2379     } else {
2380         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2381         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2382     }
2383     if (y < 0.01) y = 0.01;
2384     if (x > 1) x = 1;
2385     else if (x < -1) x = -1;
2386
2387     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2388     qpn = (int)(qpn + delta_qp + 0.5);
2389
2390     /* making sure that with QP predictions we did do not leave QPs range */
2391     BRC_CLIP(qpn, 1, 51);
2392
2393     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2394         /* correcting QPs of slices of other types */
2395         if (slicetype == HEVC_SLICE_P) {
2396             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2397                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2398             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2399                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2400         } else if (slicetype == HEVC_SLICE_I) {
2401             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2402                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2403             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2404                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2405         } else { // HEVC_SLICE_B
2406             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2407                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2408             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2409                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2410         }
2411         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2412         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2413         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2414     } else if (sts == BRC_UNDERFLOW) { // underflow
2415         if (qpn <= qp) qpn = qp + 1;
2416         if (qpn > 51) {
2417             qpn = 51;
2418             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2419         }
2420     } else if (sts == BRC_OVERFLOW) {
2421         if (qpn >= qp) qpn = qp - 1;
2422         if (qpn < 1) { // < 0 (?) overflow with minQP
2423             qpn = 1;
2424             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2425         }
2426     }
2427
2428     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2429
2430     return sts;
2431 }
2432
2433 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2434                                         struct intel_encoder_context *encoder_context)
2435 {
2436     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2437     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2438     unsigned int target_bit_rate = encoder_context->brc.bits_per_second[0];
2439
2440     // current we only support CBR mode.
2441     if (rate_control_mode == VA_RC_CBR) {
2442         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2443         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2444         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2445         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2446         mfc_context->vui_hrd.i_frame_number = 0;
2447
2448         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2449         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2450         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2451     }
2452
2453 }
2454
2455 void
2456 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2457                               struct gen9_hcpe_context *mfc_context)
2458 {
2459     mfc_context->vui_hrd.i_frame_number++;
2460 }
2461
2462 int intel_hcpe_interlace_check(VADriverContextP ctx,
2463                                struct encode_state *encode_state,
2464                                struct intel_encoder_context *encoder_context)
2465 {
2466     VAEncSliceParameterBufferHEVC *pSliceParameter;
2467     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2468     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2469     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2470     int ctb_size = 1 << log2_ctb_size;
2471     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2472     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2473     int i;
2474     int ctbCount = 0;
2475
2476     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2477         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2478         ctbCount += pSliceParameter->num_ctu_in_slice;
2479     }
2480
2481     if (ctbCount == (width_in_ctb * height_in_ctb))
2482         return 0;
2483
2484     return 1;
2485 }
2486
2487 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2488                             struct intel_encoder_context *encoder_context)
2489 {
2490     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2491     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2492
2493     if (rate_control_mode == VA_RC_CBR) {
2494         bool brc_updated;
2495         assert(encoder_context->codec != CODEC_MPEG2);
2496
2497         brc_updated = encoder_context->brc.need_reset;
2498
2499         /*Programing bit rate control */
2500         if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2501             brc_updated) {
2502             intel_hcpe_bit_rate_control_context_init(encode_state, encoder_context);
2503             intel_hcpe_brc_init(encode_state, encoder_context);
2504         }
2505
2506         /*Programing HRD control */
2507         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2508             intel_hcpe_hrd_context_init(encode_state, encoder_context);
2509     }
2510 }
2511
2512 /* HEVC interface API for encoder */
2513
2514 static VAStatus
2515 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2516                               struct encode_state *encode_state,
2517                               struct intel_encoder_context *encoder_context)
2518 {
2519     struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2520     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2521     int current_frame_bits_size;
2522     int sts;
2523
2524     for (;;) {
2525         gen9_hcpe_init(ctx, encode_state, encoder_context);
2526         intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2527         /*Programing bcs pipeline*/
2528         gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2529         gen9_hcpe_run(ctx, encode_state, encoder_context);
2530         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2531             gen9_hcpe_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
2532             sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2533             if (sts == BRC_NO_HRD_VIOLATION) {
2534                 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2535                 break;
2536             } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2537                 if (!hcpe_context->hrd.violation_noted) {
2538                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2539                     hcpe_context->hrd.violation_noted = 1;
2540                 }
2541                 return VA_STATUS_SUCCESS;
2542             }
2543         } else {
2544             break;
2545         }
2546     }
2547
2548     return VA_STATUS_SUCCESS;
2549 }
2550
2551 void
2552 gen9_hcpe_context_destroy(void *context)
2553 {
2554     struct gen9_hcpe_context *hcpe_context = context;
2555     int i;
2556
2557     dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2558     hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2559
2560     dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2561     hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2562
2563     dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2564     hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2565
2566     dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2567     hcpe_context->uncompressed_picture_source.bo = NULL;
2568
2569     dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2570     hcpe_context->metadata_line_buffer.bo = NULL;
2571
2572     dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2573     hcpe_context->metadata_tile_line_buffer.bo = NULL;
2574
2575     dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2576     hcpe_context->metadata_tile_column_buffer.bo = NULL;
2577
2578     dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2579     hcpe_context->sao_line_buffer.bo = NULL;
2580
2581     dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2582     hcpe_context->sao_tile_line_buffer.bo = NULL;
2583
2584     dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2585     hcpe_context->sao_tile_column_buffer.bo = NULL;
2586
2587     /* mv temporal buffer */
2588     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2589         if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2590             dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2591         hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2592     }
2593
2594     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2595         dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2596         hcpe_context->reference_surfaces[i].bo = NULL;
2597     }
2598
2599     dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2600     hcpe_context->hcp_indirect_cu_object.bo = NULL;
2601
2602     dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2603     hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2604
2605     dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2606     hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2607
2608     dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2609     hcpe_context->aux_batchbuffer_surface.bo = NULL;
2610
2611     if (hcpe_context->aux_batchbuffer)
2612         intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2613
2614     hcpe_context->aux_batchbuffer = NULL;
2615
2616     free(hcpe_context);
2617 }
2618
2619 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2620                             VAProfile profile,
2621                             struct encode_state *encode_state,
2622                             struct intel_encoder_context *encoder_context)
2623 {
2624     VAStatus vaStatus;
2625
2626     switch (profile) {
2627     case VAProfileHEVCMain:
2628     case VAProfileHEVCMain10:
2629         vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2630         break;
2631
2632     default:
2633         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2634         break;
2635     }
2636
2637     return vaStatus;
2638 }
2639
2640 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2641 {
2642     struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2643
2644     assert(hcpe_context);
2645     hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2646     hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2647     hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2648     hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2649     hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2650     hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2651     hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2652     hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2653
2654     encoder_context->mfc_context = hcpe_context;
2655     encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2656     encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2657     encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2658
2659     hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);
2660
2661     return True;
2662 }