OSDN Git Service

Silence compiler warning
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc_hevc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Qu Pengfei <Pengfei.Qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen9_mfc.h"
42 #include "gen6_vme.h"
43 #include "intel_media.h"
44
45 typedef enum _gen6_brc_status {
46     BRC_NO_HRD_VIOLATION = 0,
47     BRC_UNDERFLOW = 1,
48     BRC_OVERFLOW = 2,
49     BRC_UNDERFLOW_WITH_MAX_QP = 3,
50     BRC_OVERFLOW_WITH_MIN_QP = 4,
51 } gen6_brc_status;
52
53 /* BRC define */
54 #define BRC_CLIP(x, min, max)                                   \
55     {                                                           \
56         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
57     }
58
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
62
63 #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
65
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
70
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
72
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do {     \
75         dri_bo_unreference(gen_buffer->bo);                     \
76         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
77                                       string,                   \
78                                       size,                     \
79                                       0x1000);                  \
80         assert(gen_buffer->bo);                                 \
81     } while (0);
82
83
84 #define OUT_BUFFER_X(buf_bo, is_target, ma)  do {                         \
85         if (buf_bo) {                                                   \
86             OUT_BCS_RELOC(batch,                                        \
87                           buf_bo,                                       \
88                           I915_GEM_DOMAIN_INSTRUCTION,                       \
89                           is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0,       \
90                           0);                                           \
91         } else {                                                        \
92             OUT_BCS_BATCH(batch, 0);                                    \
93         }                                                               \
94         OUT_BCS_BATCH(batch, 0);                                        \
95         if (ma)                                                         \
96             OUT_BCS_BATCH(batch, 0);                                    \
97     } while (0)
98
99 #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER_X(buf_bo, 0, 0)
103
104
105 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
108
109 #define HCP_SOFTWARE_SKYLAKE    1
110
111 #define NUM_HCPE_KERNEL 2
112
113 #define     INTER_MODE_MASK     0x03
114 #define     INTER_8X8       0x03
115 #define     INTER_16X8      0x01
116 #define     INTER_8X16      0x02
117 #define     SUBMB_SHAPE_MASK    0x00FF00
118
119 #define     INTER_MV8       (4 << 20)
120 #define     INTER_MV32      (6 << 20)
121
122
123 /* HEVC */
124
125 /* utils */
126 static void
127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
128 {
129     /* Flat_4x4_16 */
130     memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
131
132     /* Flat_8x8_16 */
133     memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
134
135     /* Flat_16x16_16 */
136     memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
137
138     /* Flat_32x32_16 */
139     memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
140
141     /* Flat_16x16_dc_16 */
142     memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
143
144     /* Flat_32x32_dc_16 */
145     memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
146 }
147
148 /* HEVC picture and slice state related */
149
150 static void
151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
152                            int standard_select,
153                            struct intel_encoder_context *encoder_context)
154 {
155     struct intel_batchbuffer *batch = encoder_context->base.batch;
156
157     assert(standard_select == HCP_CODEC_HEVC);
158
159     BEGIN_BCS_BATCH(batch, 4);
160
161     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
162     OUT_BCS_BATCH(batch,
163                   (standard_select << 5) |
164                   (0 << 3) | /* disable Pic Status / Error Report */
165                   HCP_CODEC_SELECT_ENCODE);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
174                         struct intel_encoder_context *encoder_context)
175 {
176     struct intel_batchbuffer *batch = encoder_context->base.batch;
177     struct object_surface *obj_surface = encode_state->reconstructed_object;
178     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
179
180     /* to do */
181     unsigned int y_cb_offset;
182
183     assert(obj_surface);
184
185     y_cb_offset = obj_surface->y_cb_offset;
186
187     BEGIN_BCS_BATCH(batch, 3);
188     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
189     OUT_BCS_BATCH(batch,
190                   (1 << 28) |                   /* surface id */
191                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
192     OUT_BCS_BATCH(batch,
193                   (SURFACE_FORMAT_PLANAR_420_8 << 28) |
194                   y_cb_offset);
195     ADVANCE_BCS_BATCH(batch);
196
197     BEGIN_BCS_BATCH(batch, 3);
198     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
199     OUT_BCS_BATCH(batch,
200                   (0 << 28) |                   /* surface id */
201                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
202     OUT_BCS_BATCH(batch,
203                   (SURFACE_FORMAT_PLANAR_420_8 << 28) |
204                   y_cb_offset);
205     ADVANCE_BCS_BATCH(batch);
206 }
207
208 static void
209 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
210                               struct intel_encoder_context *encoder_context)
211 {
212     struct intel_batchbuffer *batch = encoder_context->base.batch;
213     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
214     struct object_surface *obj_surface;
215     GenHevcSurface *hcpe_hevc_surface;
216     dri_bo *bo;
217     unsigned int i;
218
219     BEGIN_BCS_BATCH(batch, 95);
220
221     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
222
223     obj_surface = encode_state->reconstructed_object;
224     assert(obj_surface && obj_surface->bo);
225     hcpe_hevc_surface = obj_surface->private_data;
226     assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
227
228     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
229     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
230     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
231     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
232     OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo);         /* DW 13..15 */
233     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
234     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
235     OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo);              /* DW 22..24 */
236     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
237     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
238     OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
239     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
240
241     /* here only max 8 reference allowed */
242     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
243         bo = mfc_context->reference_surfaces[i].bo;
244
245         if (bo) {
246             OUT_BUFFER_NMA_REFERENCE(bo);
247         } else
248             OUT_BUFFER_NMA_REFERENCE(NULL);
249     }
250     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
251
252     OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
253     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore  */
254     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore  */
255     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore  */
256
257     for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
258         bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
259
260         if (bo) {
261             OUT_BUFFER_NMA_REFERENCE(bo);
262         } else
263             OUT_BUFFER_NMA_REFERENCE(NULL);
264     }
265     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
266
267     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
268     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
269     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
270     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
271
272     ADVANCE_BCS_BATCH(batch);
273 }
274
275 static void
276 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
277                                   struct intel_encoder_context *encoder_context)
278 {
279     struct intel_batchbuffer *batch = encoder_context->base.batch;
280     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
281
282     /* to do */
283     BEGIN_BCS_BATCH(batch, 14);
284
285     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
286     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 1..3 igonre for encoder*/
287     OUT_BUFFER_NMA_REFERENCE(NULL);                /* DW 4..5, Upper Bound */
288     OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo);                 /* DW 6..8, CU */
289     /* DW 9..11, PAK-BSE */
290     OUT_BCS_RELOC(batch,
291                   mfc_context->hcp_indirect_pak_bse_object.bo,
292                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
293                   mfc_context->hcp_indirect_pak_bse_object.offset);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_RELOC(batch,
297                   mfc_context->hcp_indirect_pak_bse_object.bo,
298                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
299                   mfc_context->hcp_indirect_pak_bse_object.end_offset);
300     OUT_BCS_BATCH(batch, 0);
301
302     ADVANCE_BCS_BATCH(batch);
303 }
304
305 static void
306 gen9_hcpe_fqm_state(VADriverContextP ctx,
307                     int size_id,
308                     int color_component,
309                     int pred_type,
310                     int dc,
311                     unsigned int *fqm,
312                     int fqm_length,
313                     struct intel_encoder_context *encoder_context)
314 {
315     struct intel_batchbuffer *batch = encoder_context->base.batch;
316     unsigned int fqm_buffer[32];
317
318     assert(fqm_length <= 32);
319     assert(sizeof(*fqm) == 4);
320     memset(fqm_buffer, 0, sizeof(fqm_buffer));
321     memcpy(fqm_buffer, fqm, fqm_length * 4);
322
323     BEGIN_BCS_BATCH(batch, 34);
324
325     OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
326     OUT_BCS_BATCH(batch,
327                   dc << 16 |
328                   color_component << 3 |
329                   size_id << 1 |
330                   pred_type);
331     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
332
333     ADVANCE_BCS_BATCH(batch);
334 }
335
336
337 static void
338 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[32] = {
341         0x10001000, 0x10001000, 0x10001000, 0x10001000,
342         0x10001000, 0x10001000, 0x10001000, 0x10001000,
343         0x10001000, 0x10001000, 0x10001000, 0x10001000,
344         0x10001000, 0x10001000, 0x10001000, 0x10001000,
345         0x10001000, 0x10001000, 0x10001000, 0x10001000,
346         0x10001000, 0x10001000, 0x10001000, 0x10001000,
347         0x10001000, 0x10001000, 0x10001000, 0x10001000,
348         0x10001000, 0x10001000, 0x10001000, 0x10001000
349     };
350
351     gen9_hcpe_fqm_state(ctx,
352                         0, 0, 0, 0,
353                         qm, 8,
354                         encoder_context);
355     gen9_hcpe_fqm_state(ctx,
356                         0, 0, 1, 0,
357                         qm, 8,
358                         encoder_context);
359     gen9_hcpe_fqm_state(ctx,
360                         1, 0, 0, 0,
361                         qm, 32,
362                         encoder_context);
363     gen9_hcpe_fqm_state(ctx,
364                         1, 0, 1, 0,
365                         qm, 32,
366                         encoder_context);
367     gen9_hcpe_fqm_state(ctx,
368                         2, 0, 0, 0x1000,
369                         qm, 0,
370                         encoder_context);
371     gen9_hcpe_fqm_state(ctx,
372                         2, 0, 1, 0x1000,
373                         qm, 0,
374                         encoder_context);
375     gen9_hcpe_fqm_state(ctx,
376                         3, 0, 0, 0x1000,
377                         qm, 0,
378                         encoder_context);
379     gen9_hcpe_fqm_state(ctx,
380                         3, 0, 1, 0x1000,
381                         qm, 0,
382                         encoder_context);
383 }
384
385 static void
386 gen9_hcpe_qm_state(VADriverContextP ctx,
387                    int size_id,
388                    int color_component,
389                    int pred_type,
390                    int dc,
391                    unsigned int *qm,
392                    int qm_length,
393                    struct intel_encoder_context *encoder_context)
394 {
395     struct intel_batchbuffer *batch = encoder_context->base.batch;
396     unsigned int qm_buffer[16];
397
398     assert(qm_length <= 16);
399     assert(sizeof(*qm) == 4);
400     memset(qm_buffer, 0, sizeof(qm_buffer));
401     memcpy(qm_buffer, qm, qm_length * 4);
402
403     BEGIN_BCS_BATCH(batch, 18);
404
405     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
406     OUT_BCS_BATCH(batch,
407                   dc << 5 |
408                   color_component << 3 |
409                   size_id << 1 |
410                   pred_type);
411     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
412
413     ADVANCE_BCS_BATCH(batch);
414 }
415
416 static void
417 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
418 {
419
420     int i;
421
422     unsigned int qm[16] = {
423         0x10101010, 0x10101010, 0x10101010, 0x10101010,
424         0x10101010, 0x10101010, 0x10101010, 0x10101010,
425         0x10101010, 0x10101010, 0x10101010, 0x10101010,
426         0x10101010, 0x10101010, 0x10101010, 0x10101010
427     };
428
429     for (i = 0; i < 6; i++) {
430         gen9_hcpe_qm_state(ctx,
431                            0, i % 3, i / 3, 0,
432                            qm, 4,
433                            encoder_context);
434     }
435
436     for (i = 0; i < 6; i++) {
437         gen9_hcpe_qm_state(ctx,
438                            1, i % 3, i / 3, 0,
439                            qm, 16,
440                            encoder_context);
441     }
442
443     for (i = 0; i < 6; i++) {
444         gen9_hcpe_qm_state(ctx,
445                            2, i % 3, i / 3, 16,
446                            qm, 16,
447                            encoder_context);
448     }
449
450     for (i = 0; i < 2; i++) {
451         gen9_hcpe_qm_state(ctx,
452                            3, 0, i % 2, 16,
453                            qm, 16,
454                            encoder_context);
455     }
456 }
457
458 static void
459 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
460                          struct intel_encoder_context *encoder_context)
461 {
462     struct intel_batchbuffer *batch = encoder_context->base.batch;
463     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
464     VAEncPictureParameterBufferHEVC *pic_param ;
465     VAEncSequenceParameterBufferHEVC *seq_param ;
466
467     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
468     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
469     /*
470      * 7.4.3.1
471      *
472      * When not present, the value of loop_filter_across_tiles_enabled_flag
473      * is inferred to be equal to 1.
474      */
475     int loop_filter_across_tiles_enabled_flag = 0;
476     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
477     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
478
479     int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
480     int log2_ctb_size =  seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
481     int ctb_size = 1 << log2_ctb_size;
482     double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
483     int maxctubits = (int)(5 * rawctubits / 3) ;
484     double bitrate = seq_param->bits_per_second * 1000.0;
485     double framebitrate = bitrate / 32 / 8; //32 byte unit
486     int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
487     int maxframebitrate = (int)(framebitrate * 10 / 10);
488     int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
489     int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
490     int minframesize = 0;//(int)(rawframebits * 1/50);
491
492     if (seq_param->seq_fields.bits.pcm_enabled_flag) {
493         max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
494         min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
495         pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
496         pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
497     } else {
498         max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
499     }
500
501     if (pic_param->pic_fields.bits.tiles_enabled_flag)
502         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
503
504     /* set zero for encoder */
505     loop_filter_across_tiles_enabled_flag = 0;
506
507     BEGIN_BCS_BATCH(batch, 19);
508
509     OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
510
511     OUT_BCS_BATCH(batch,
512                   mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
513                   0 << 14 |
514                   mfc_context->pic_size.picture_width_in_min_cb_minus1);
515     OUT_BCS_BATCH(batch,
516                   max_pcm_size_minus3 << 10 |
517                   min_pcm_size_minus3 << 8 |
518                   (seq_param->log2_min_transform_block_size_minus2 +
519                    seq_param->log2_diff_max_min_transform_block_size) << 6 |
520                   seq_param->log2_min_transform_block_size_minus2 << 4 |
521                   (seq_param->log2_min_luma_coding_block_size_minus3 +
522                    seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
523                   seq_param->log2_min_luma_coding_block_size_minus3);
524     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
525     OUT_BCS_BATCH(batch,
526                   0 << 27 | /* CU packet structure is 0 for SKL */
527                   seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
528                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
529                   seq_param->seq_fields.bits.amp_enabled_flag << 23 |
530                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
531                   0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
532                   0 << 20 |     /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
533                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
534                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
535                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |                 /* 0 for encoder */
536                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
537                   loop_filter_across_tiles_enabled_flag << 15 |
538                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |  /* 0 for encoder */
539                   pic_param->log2_parallel_merge_level_minus2 << 10 |               /* 0 for encoder */
540                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |     /* 0 for encoder */
541                   seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
542                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |                 /* 0 for encoder */
543                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |        /* 0 for encoder */
544                   seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
545                   seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
546                   0);
547     OUT_BCS_BATCH(batch,
548                   0 << 27 |                 /* 8 bit only for encoder */
549                   0 << 24 |                 /* 8 bit only for encoder */
550                   pcm_sample_bit_depth_luma_minus1 << 20 |
551                   pcm_sample_bit_depth_chroma_minus1 << 16 |
552                   seq_param->max_transform_hierarchy_depth_inter << 13 |    /*  for encoder */
553                   seq_param->max_transform_hierarchy_depth_intra << 10 |    /*  for encoder */
554                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
555                   (pic_param->pps_cb_qp_offset & 0x1f));
556     OUT_BCS_BATCH(batch,
557                   0 << 29 | /* must be 0 for encoder */
558                   maxctubits); /* DW 6, max LCU bit size allowed for encoder  */
559     OUT_BCS_BATCH(batch,
560                   0 << 31 | /* frame bitrate max unit */
561                   maxframebitrate); /* DW 7, frame bitrate max 0:13   */
562     OUT_BCS_BATCH(batch,
563                   0 << 31 | /* frame bitrate min unit */
564                   minframebitrate); /* DW 8, frame bitrate min 0:13   */
565     OUT_BCS_BATCH(batch,
566                   maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
567                   mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
568     OUT_BCS_BATCH(batch, 0x07050402);   /* DW 10, frame delta qp max */
569     OUT_BCS_BATCH(batch, 0x0d0b0908);
570     OUT_BCS_BATCH(batch, 0);    /* DW 12, frame delta qp min */
571     OUT_BCS_BATCH(batch, 0);
572     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 14, frame delta qp max range  */
573     OUT_BCS_BATCH(batch, 0x100c0806);   /* DW 15 */
574     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 16, frame delta qp min range  */
575     OUT_BCS_BATCH(batch, 0x100c0806);
576     OUT_BCS_BATCH(batch,
577                   0 << 30 |
578                   minframesize);    /* DW 18, min frame size units */
579
580     ADVANCE_BCS_BATCH(batch);
581 }
582
583
584 static void
585 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
586                              unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
587                              int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
588                              struct intel_batchbuffer *batch)
589 {
590     if (batch == NULL)
591         batch = encoder_context->base.batch;
592
593     if (data_bits_in_last_dw == 0)
594         data_bits_in_last_dw = 32;
595
596     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
597
598     OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
599     OUT_BCS_BATCH(batch,
600                   (0 << 31) |   /* inline payload */
601                   (0 << 16) |   /* always start at offset 0 */
602                   (0 << 15) |   /* HeaderLengthExcludeFrmSize */
603                   (data_bits_in_last_dw << 8) |
604                   (skip_emul_byte_count << 4) |
605                   (!!emulation_flag << 3) |
606                   ((!!is_last_header) << 2) |
607                   ((!!is_end_of_slice) << 1) |
608                   (0 << 0));    /* Reserved */
609     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
610
611     ADVANCE_BCS_BATCH(batch);
612 }
613 /*
614 // To be do: future
615 static uint8_t
616 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
617 {
618     unsigned int is_long_term =
619         !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
620     unsigned int is_top_field =
621         !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
622     unsigned int is_bottom_field =
623         !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
624
625     return ((is_long_term                         << 6) |
626             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
627             (frame_store_id                       << 1) |
628             ((is_top_field ^ 1) & is_bottom_field));
629 }
630 */
631 static void
632 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
633                           int list,
634                           struct intel_encoder_context *encoder_context,
635                           struct encode_state *encode_state)
636 {
637     int i;
638     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
639     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
640     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
641     VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
642     struct gen6_vme_context *vme_context = encoder_context->vme_context;
643     struct object_surface *obj_surface;
644     int frame_index;
645
646     int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
647
648     if (ref_idx_l0 > 3) {
649         WARN_ONCE("ref_idx_l0 is out of range\n");
650         ref_idx_l0 = 0;
651     }
652
653     obj_surface = vme_context->used_reference_objects[list];
654     frame_index = -1;
655     for (i = 0; i < 16; i++) {
656         if (obj_surface &&
657             obj_surface == encode_state->reference_objects[i]) {
658             frame_index = i;
659             break;
660         }
661     }
662     if (frame_index == -1) {
663         WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
664     }
665
666     BEGIN_BCS_BATCH(batch, 18);
667
668     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
669     OUT_BCS_BATCH(batch,
670                   num_ref_minus1 << 1 |
671                   list);
672
673     for (i = 0; i < 16; i++) {
674         if (i < MIN((num_ref_minus1 + 1), 15)) {
675             VAPictureHEVC *ref_pic = &ref_list[i];
676             VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
677
678             OUT_BCS_BATCH(batch,
679                           1 << 15 |         /* bottom_field_flag 0 */
680                           0 << 14 |         /* field_pic_flag 0 */
681                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |  /* short term is 1 */
682                           0 << 12 | /* disable WP */
683                           0 << 11 | /* disable WP */
684                           frame_index << 8 |
685                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
686         } else {
687             OUT_BCS_BATCH(batch, 0);
688         }
689     }
690
691     ADVANCE_BCS_BATCH(batch);
692 }
693
694 void
695 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
696                               struct encode_state *encode_state,
697                               struct intel_encoder_context *encoder_context
698                              )
699 {
700     struct intel_batchbuffer *batch = encoder_context->base.batch;
701     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
702
703     if (slice_param->slice_type == HEVC_SLICE_I)
704         return;
705
706     gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
707
708     if (slice_param->slice_type == HEVC_SLICE_P)
709         return;
710
711     gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
712 }
713
714 static void
715 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
716                            VAEncPictureParameterBufferHEVC *pic_param,
717                            VAEncSliceParameterBufferHEVC *slice_param,
718                            struct encode_state *encode_state,
719                            struct intel_encoder_context *encoder_context,
720                            struct intel_batchbuffer *batch)
721 {
722     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
723     int slice_type = slice_param->slice_type;
724
725     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
726     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
727     int ctb_size = 1 << log2_ctb_size;
728     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
729     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
730     int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
731
732     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
733
734     slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
735     slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
736
737     next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
738     next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
739
740     /* only support multi slice begin from row start address */
741     assert((slice_param->slice_segment_address % width_in_ctb) == 0);
742
743     if (last_slice == 1) {
744         if (slice_param->slice_segment_address == 0) {
745             next_slice_hor_pos = 0;
746             next_slice_ver_pos = height_in_ctb;
747         } else {
748             next_slice_hor_pos = 0;
749             next_slice_ver_pos = 0;
750         }
751     }
752
753     BEGIN_BCS_BATCH(batch, 9);
754
755     OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
756
757     OUT_BCS_BATCH(batch,
758                   slice_ver_pos << 16 |
759                   slice_hor_pos);
760     OUT_BCS_BATCH(batch,
761                   next_slice_ver_pos << 16 |
762                   next_slice_hor_pos);
763     OUT_BCS_BATCH(batch,
764                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
765                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
766                   (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
767                   slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
768                   slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
769                   last_slice << 2 |
770                   slice_type);
771     OUT_BCS_BATCH(batch,
772                   0 << 26 |
773                   (slice_param->max_num_merge_cand - 1)  << 23 |
774                   slice_param->slice_fields.bits.cabac_init_flag << 22 |
775                   slice_param->luma_log2_weight_denom << 19 |
776                   (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
777                   slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
778                   (slice_type != HEVC_SLICE_B) << 14 |
779                   slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
780                   slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
781                   slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
782                   slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
783                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
784                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
785                   slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
786     OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
787     OUT_BCS_BATCH(batch,
788                   4 << 26 |
789                   4 << 20 |
790                   0);
791     OUT_BCS_BATCH(batch,
792                   1 << 10 |  /* header insertion enable */
793                   1 << 9  |  /* slice data enable */
794                   1 << 8  |  /* tail insertion enable, must at end of frame, not slice */
795                   1 << 2  |  /* RBSP or EBSP, EmulationByteSliceInsertEnable */
796                   1 << 1  |  /* cabacZeroWordInsertionEnable */
797                   0);        /* Ignored for decoding */
798     OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
799
800     ADVANCE_BCS_BATCH(batch);
801 }
802
803 /* HEVC pipe line related */
804 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
805         struct encode_state *encode_state,
806         struct intel_encoder_context *encoder_context)
807 {
808     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
809
810     mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
811     mfc_context->set_surface_state(ctx, encode_state, encoder_context);
812     gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
813     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
814
815     mfc_context->qm_state(ctx, encoder_context);
816     mfc_context->fqm_state(ctx, encoder_context);
817     mfc_context->pic_state(ctx, encode_state, encoder_context);
818     intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
819 }
820
821 static void gen9_hcpe_init(VADriverContextP ctx,
822                            struct encode_state *encode_state,
823                            struct intel_encoder_context *encoder_context)
824 {
825     /* to do */
826     struct i965_driver_data *i965 = i965_driver_data(ctx);
827     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
828     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
829     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
830     dri_bo *bo;
831     int i, size = 0;
832     int slice_batchbuffer_size;
833     int slice_type = slice_param->slice_type;
834     int is_inter = (slice_type != HEVC_SLICE_I);
835
836     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
837     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
838     int ctb_size = 1 << log2_ctb_size;
839     int cu_size  = 1 << log2_cu_size;
840
841     int width_in_ctb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
842     int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
843     int width_in_cu  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
844     int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
845     int width_in_mb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
846     int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
847
848     int num_cu_record = 64;
849
850     if (log2_ctb_size == 5) num_cu_record = 16;
851     else if (log2_ctb_size == 4) num_cu_record = 4;
852     else if (log2_ctb_size == 6) num_cu_record = 64;
853
854     /* frame size in samples, cu,ctu, mb */
855     mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
856     mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
857     mfc_context->pic_size.ctb_size = ctb_size;
858     mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
859     mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
860     mfc_context->pic_size.min_cb_size = cu_size;
861     mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
862     mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
863     mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
864     mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
865
866     slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
867                              (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
868
869     /*Encode common setup for HCP*/
870     /*deblocking */
871     dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
872     mfc_context->deblocking_filter_line_buffer.bo = NULL;
873
874     dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
875     mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
876
877     dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
878     mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
879
880     /* input source */
881     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
882     mfc_context->uncompressed_picture_source.bo = NULL;
883
884     /* metadata */
885     dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
886     mfc_context->metadata_line_buffer.bo = NULL;
887
888     dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
889     mfc_context->metadata_tile_line_buffer.bo = NULL;
890
891     dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
892     mfc_context->metadata_tile_column_buffer.bo = NULL;
893
894     /* sao */
895     dri_bo_unreference(mfc_context->sao_line_buffer.bo);
896     mfc_context->sao_line_buffer.bo = NULL;
897
898     dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
899     mfc_context->sao_tile_line_buffer.bo = NULL;
900
901     dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
902     mfc_context->sao_tile_column_buffer.bo = NULL;
903
904     /* mv temporal buffer */
905     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
906         if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
907             dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
908         mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
909     }
910
911     /* reference */
912     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
913         if (mfc_context->reference_surfaces[i].bo != NULL)
914             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
915         mfc_context->reference_surfaces[i].bo = NULL;
916     }
917
918     /* indirect data CU recording */
919     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
920     mfc_context->hcp_indirect_cu_object.bo = NULL;
921
922     dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
923     mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
924
925     /* Current internal buffer for HCP */
926
927     size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> 3;
928     size <<= 6;
929     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
930     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
931
932     size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> 3;
933     size <<= 6;
934     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
935
936     if (is_inter) {
937         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
938         size <<= 6;
939         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
940
941         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
942         size <<= 6;
943         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
944
945         size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
946         size <<= 6;
947         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
948     } else {
949         size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
950         size <<= 6;
951         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
952
953         size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
954         size <<= 6;
955         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
956
957         size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
958         size <<= 6;
959         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
960     }
961
962     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> 3;
963     size <<= 6;
964     ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
965
966     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> 3;
967     size <<= 6;
968     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
969
970     size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> 3;
971     size <<= 6;
972     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
973
974     /////////////////////
975     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
976     bo = dri_bo_alloc(i965->intel.bufmgr,
977                       "Indirect data CU Buffer",
978                       width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
979                       0x1000);
980     assert(bo);
981     mfc_context->hcp_indirect_cu_object.bo = bo;
982
983     /* to do pak bse object buffer */
984     /* to do current collocated mv temporal buffer */
985
986     dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
987     mfc_context->hcp_batchbuffer_surface.bo = NULL;
988
989     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
990     mfc_context->aux_batchbuffer_surface.bo = NULL;
991
992     if (mfc_context->aux_batchbuffer)
993         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
994
995     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
996     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
997     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
998     mfc_context->aux_batchbuffer_surface.pitch = 16;
999     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1000     mfc_context->aux_batchbuffer_surface.size_block = 16;
1001 }
1002
1003 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1004                               struct encode_state *encode_state,
1005                               struct intel_encoder_context *encoder_context)
1006 {
1007     struct intel_batchbuffer *batch = encoder_context->base.batch;
1008
1009     intel_batchbuffer_flush(batch);     //run the pipeline
1010
1011     return VA_STATUS_SUCCESS;
1012 }
1013
1014
1015 static VAStatus
1016 gen9_hcpe_stop(VADriverContextP ctx,
1017                struct encode_state *encode_state,
1018                struct intel_encoder_context *encoder_context,
1019                int *encoded_bits_size)
1020 {
1021     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1022     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1023     VACodedBufferSegment *coded_buffer_segment;
1024
1025     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1026     assert(vaStatus == VA_STATUS_SUCCESS);
1027     *encoded_bits_size = coded_buffer_segment->size * 8;
1028     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1029
1030     return VA_STATUS_SUCCESS;
1031 }
1032
1033
1034 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1035 {
1036     /* to do */
1037     int i, found;
1038     int leading_zero_cnt, byte_length, zero_byte;
1039     int nal_unit_type;
1040     int skip_cnt = 0;
1041
1042 #define NAL_UNIT_TYPE_MASK 0x7e
1043 #define HW_MAX_SKIP_LENGTH 15
1044
1045     byte_length = ALIGN(bits_length, 32) >> 3;
1046
1047
1048     leading_zero_cnt = 0;
1049     found = 0;
1050     for (i = 0; i < byte_length - 4; i++) {
1051         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1052             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1053             found = 1;
1054             break;
1055         }
1056         leading_zero_cnt++;
1057     }
1058     if (!found) {
1059         /* warning message is complained. But anyway it will be inserted. */
1060         WARN_ONCE("Invalid packed header data. "
1061                   "Can't find the 000001 start_prefix code\n");
1062         return 0;
1063     }
1064     i = leading_zero_cnt;
1065
1066     zero_byte = 0;
1067     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1068         zero_byte = 1;
1069
1070     skip_cnt = leading_zero_cnt + zero_byte + 3;
1071
1072     /* the unit header byte is accounted */
1073     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1074     skip_cnt += 1;
1075     skip_cnt += 1;  /* two bytes length of nal headers in hevc */
1076
1077     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1078         /* more unit header bytes are accounted for MVC/SVC */
1079         //skip_cnt += 3;
1080     }
1081     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1082         WARN_ONCE("Too many leading zeros are padded for packed data. "
1083                   "It is beyond the HW range.!!!\n");
1084     }
1085     return skip_cnt;
1086 }
1087
1088 #ifdef HCP_SOFTWARE_SKYLAKE
1089
1090 static int
1091 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1092                           struct intel_encoder_context *encoder_context,
1093                           int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1094                           struct intel_batchbuffer *batch)
1095 {
1096     int len_in_dwords = 3;
1097
1098     if (batch == NULL)
1099         batch = encoder_context->base.batch;
1100
1101     BEGIN_BCS_BATCH(batch, len_in_dwords);
1102
1103     OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1104     OUT_BCS_BATCH(batch,
1105                   (((isLast_ctb > 0) ? 1 : 0) << 31) |  /* last ctb?*/
1106                   ((cu_count_in_lcu - 1) << 24) |           /* No motion vector */
1107                   split_coding_unit_flag);
1108
1109     OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x);        /* LCU  for Y*/
1110
1111     ADVANCE_BCS_BATCH(batch);
1112
1113     return len_in_dwords;
1114 }
1115
1116 #define     AVC_INTRA_RDO_OFFSET    4
1117 #define     AVC_INTER_RDO_OFFSET    10
1118 #define     AVC_INTER_MSG_OFFSET    8
1119 #define     AVC_INTER_MV_OFFSET     48
1120 #define     AVC_RDO_MASK            0xFFFF
1121
1122 #define     AVC_INTRA_MODE_MASK     0x30
1123 #define     AVC_INTRA_16X16         0x00
1124 #define     AVC_INTRA_8X8           0x01
1125 #define     AVC_INTRA_4X4           0x02
1126
1127 #define     AVC_INTER_MODE_MASK     0x03
1128 #define     AVC_INTER_8X8           0x03
1129 #define     AVC_INTER_8X16          0x02
1130 #define     AVC_INTER_16X8          0x01
1131 #define     AVC_INTER_16X16         0x00
1132 #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
1133
1134 /* VME output message, write back message */
1135 #define     AVC_INTER_SUBMB_PRE_MODE_MASK       0x00ff0000
1136 #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
1137
1138 /* here 1 MB = 1CU = 16x16 */
1139 static void
1140 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1141                                       struct encode_state *encode_state,
1142                                       struct intel_encoder_context *encoder_context,
1143                                       int qp, unsigned int *msg,
1144                                       int ctb_x, int ctb_y,
1145                                       int mb_x, int mb_y,
1146                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type,int cu_index,int index)
1147 {
1148     /* here cu == mb, so we use mb address as the cu address */
1149     /* to fill the indirect cu by the vme out */
1150     static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1151     static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1152     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1153     unsigned char * cu_record_ptr = NULL;
1154     unsigned int * cu_msg = NULL;
1155     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1156     int mb_address_in_ctb = 0;
1157     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1158     int zero = 0;
1159     int is_inter = 0;
1160     int intraMbMode = 0;
1161     int cu_part_mode = 0;
1162     int intraMode[4];
1163     int inerpred_idc = 0;
1164     int intra_chroma_mode = 5;
1165     int cu_size = 1;
1166     int tu_size = 0x55;
1167     int tu_count = 4;
1168
1169     if (!is_inter) inerpred_idc = 0xff;
1170
1171     intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1172
1173
1174     if (intraMbMode == AVC_INTRA_16X16) {
1175         cu_part_mode = 0; //2Nx2N
1176         intra_chroma_mode = 5;
1177         cu_size = 1;
1178         tu_size = 0x55;
1179         tu_count = 4;
1180         intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1181         intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1182         intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1183         intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1184     } else if (intraMbMode == AVC_INTRA_8X8) {
1185         cu_part_mode = 0; //2Nx2N
1186         intra_chroma_mode = 5;
1187         cu_size = 0;
1188         tu_size = 0;
1189         tu_count = 4;
1190         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1191         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1192         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1193         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1194
1195     } else { // for 4x4 to use 8x8 replace
1196         cu_part_mode = 3; //NxN
1197         intra_chroma_mode = 0;
1198         cu_size = 0;
1199         tu_size = 0;
1200         tu_count = 4;
1201         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1202         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1203         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1204         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1205
1206     }
1207
1208     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1209     /* get the mb info from the vme out */
1210     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1211
1212     cu_msg[0] = (inerpred_idc << 24 |   /* interpred_idc[3:0][1:0] */
1213                  zero << 23 |   /* reserved */
1214                  qp << 16 | /* CU_qp */
1215                  zero << 11 |   /* reserved */
1216                  intra_chroma_mode << 8 |   /* intra_chroma_mode */
1217                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
1218                  cu_part_mode << 4 |    /* cu_part_mode */
1219                  zero << 3 |    /* cu_transquant_bypass_flag */
1220                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
1221                  cu_size          /* cu_size */
1222                 );
1223     cu_msg[1] = (zero << 30 |   /* reserved  */
1224                  intraMode[3] << 24 |   /* intra_mode */
1225                  zero << 22 |   /* reserved  */
1226                  intraMode[2] << 16 |   /* intra_mode */
1227                  zero << 14 |   /* reserved  */
1228                  intraMode[1] << 8 |    /* intra_mode */
1229                  zero << 6 |    /* reserved  */
1230                  intraMode[0]           /* intra_mode */
1231                 );
1232     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1233     cu_msg[2] = (zero << 16 |   /* mvx_l0[1]  */
1234                  zero           /* mvx_l0[0] */
1235                 );
1236     cu_msg[3] = (zero << 16 |   /* mvx_l0[3]  */
1237                  zero           /* mvx_l0[2] */
1238                 );
1239     cu_msg[4] = (zero << 16 |   /* mvy_l0[1]  */
1240                  zero           /* mvy_l0[0] */
1241                 );
1242     cu_msg[5] = (zero << 16 |   /* mvy_l0[3]  */
1243                  zero           /* mvy_l0[2] */
1244                 );
1245
1246     cu_msg[6] = (zero << 16 |   /* mvx_l1[1]  */
1247                  zero           /* mvx_l1[0] */
1248                 );
1249     cu_msg[7] = (zero << 16 |   /* mvx_l1[3]  */
1250                  zero           /* mvx_l1[2] */
1251                 );
1252     cu_msg[8] = (zero << 16 |   /* mvy_l1[1]  */
1253                  zero           /* mvy_l1[0] */
1254                 );
1255     cu_msg[9] = (zero << 16 |   /* mvy_l1[3]  */
1256                  zero           /* mvy_l1[2] */
1257                 );
1258
1259     cu_msg[10] = (zero << 28 |  /* ref_idx_l1[3]  */
1260                   zero << 24 |  /* ref_idx_l1[2] */
1261                   zero << 20 |  /* ref_idx_l1[1]  */
1262                   zero << 16 |  /* ref_idx_l1[0] */
1263                   zero << 12 |  /* ref_idx_l0[3]  */
1264                   zero << 8 |   /* ref_idx_l0[2] */
1265                   zero << 4 |   /* ref_idx_l0[1]  */
1266                   zero          /* ref_idx_l0[0] */
1267                  );
1268
1269     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
1270     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1271                   zero << 16 |  /* reserved  */
1272                   zero          /* tu_xform_Yskip[15:0] */
1273                  );
1274     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
1275                   zero          /* tu_xform_Uskip[15:0] */
1276                  );
1277     cu_msg[14] = zero ;
1278     cu_msg[15] = zero ;
1279 }
1280
1281 /* here 1 MB = 1CU = 16x16 */
1282 static void
1283 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1284                                       struct encode_state *encode_state,
1285                                       struct intel_encoder_context *encoder_context,
1286                                       int qp, unsigned int *msg,
1287                                       int ctb_x, int ctb_y,
1288                                       int mb_x, int mb_y,
1289                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index,int index)
1290 {
1291     /* here cu == mb, so we use mb address as the cu address */
1292     /* to fill the indirect cu by the vme out */
1293     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1294     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1295     unsigned char * cu_record_ptr = NULL;
1296     unsigned int * cu_msg = NULL;
1297     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1298     int mb_address_in_ctb = 0;
1299     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1300     int zero = 0;
1301     int cu_part_mode = 0;
1302     int submb_pre_mode = 0;
1303     int is_inter = 1;
1304     int cu_size = 1;
1305     int tu_size = 0x55;
1306     int tu_count = 4;
1307     int inter_mode = 0;
1308
1309     unsigned int *mv_ptr;
1310     {
1311         inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1312         submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1313 #define MSG_MV_OFFSET   4
1314         mv_ptr = msg + MSG_MV_OFFSET;
1315         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1316         * to convert them to be compatible with the format of AVC_PAK
1317         * command.
1318         */
1319         /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1320
1321         if (inter_mode == AVC_INTER_16X16) {
1322             mv_ptr[4] = mv_ptr[0];
1323             mv_ptr[5] = mv_ptr[1];
1324             mv_ptr[2] = mv_ptr[0];
1325             mv_ptr[3] = mv_ptr[1];
1326             mv_ptr[6] = mv_ptr[0];
1327             mv_ptr[7] = mv_ptr[1];
1328             cu_part_mode = 0;
1329             cu_size = 1;
1330             tu_size = 0x55;
1331             tu_count = 4;
1332         } else if (inter_mode == AVC_INTER_8X16) {
1333             mv_ptr[4] = mv_ptr[0];
1334             mv_ptr[5] = mv_ptr[1];
1335             mv_ptr[2] = mv_ptr[8];
1336             mv_ptr[3] = mv_ptr[9];
1337             mv_ptr[6] = mv_ptr[8];
1338             mv_ptr[7] = mv_ptr[9];
1339             cu_part_mode = 1;
1340             cu_size = 1;
1341             tu_size = 0x55;
1342             tu_count = 4;
1343         } else if (inter_mode == AVC_INTER_16X8) {
1344             mv_ptr[2] = mv_ptr[0];
1345             mv_ptr[3] = mv_ptr[1];
1346             mv_ptr[4] = mv_ptr[16];
1347             mv_ptr[5] = mv_ptr[17];
1348             mv_ptr[6] = mv_ptr[24];
1349             mv_ptr[7] = mv_ptr[25];
1350             cu_part_mode = 2;
1351             cu_size = 1;
1352             tu_size = 0x55;
1353             tu_count = 4;
1354         }else if(inter_mode == AVC_INTER_8X8) {
1355             mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1356             mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1357             mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1358             mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1359             mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1360             mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1361             mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1362             mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1363             cu_part_mode = 0;
1364             cu_size = 0;
1365             tu_size = 0x0;
1366             tu_count = 4;
1367
1368         }else
1369         {
1370             mv_ptr[4] = mv_ptr[0];
1371             mv_ptr[5] = mv_ptr[1];
1372             mv_ptr[2] = mv_ptr[0];
1373             mv_ptr[3] = mv_ptr[1];
1374             mv_ptr[6] = mv_ptr[0];
1375             mv_ptr[7] = mv_ptr[1];
1376             cu_part_mode = 0;
1377             cu_size = 1;
1378             tu_size = 0x55;
1379             tu_count = 4;
1380
1381         }
1382     }
1383
1384     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1385     /* get the mb info from the vme out */
1386     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1387
1388     cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1389                  zero << 23 |   /* reserved */
1390                  qp << 16 | /* CU_qp */
1391                  zero << 11 |   /* reserved */
1392                  5 << 8 |   /* intra_chroma_mode */
1393                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
1394                  cu_part_mode << 4 |    /* cu_part_mode */
1395                  zero << 3 |    /* cu_transquant_bypass_flag */
1396                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
1397                  cu_size          /* cu_size */
1398                 );
1399     cu_msg[1] = (zero << 30 |   /* reserved  */
1400                  zero << 24 |   /* intra_mode */
1401                  zero << 22 |   /* reserved  */
1402                  zero << 16 |   /* intra_mode */
1403                  zero << 14 |   /* reserved  */
1404                  zero << 8 |    /* intra_mode */
1405                  zero << 6 |    /* reserved  */
1406                  zero           /* intra_mode */
1407                 );
1408     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1409     cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 |   /* mvx_l0[1]  */
1410                  (mv_ptr[0] & 0xffff)           /* mvx_l0[0] */
1411                 );
1412     cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 |   /* mvx_l0[3]  */
1413                  (mv_ptr[4] & 0xffff)           /* mvx_l0[2] */
1414                 );
1415     cu_msg[4] = ((mv_ptr[2] & 0xffff0000) |         /* mvy_l0[1]  */
1416                  (mv_ptr[0] & 0xffff0000) >> 16     /* mvy_l0[0] */
1417                 );
1418     cu_msg[5] = ((mv_ptr[6] & 0xffff0000) |         /* mvy_l0[3]  */
1419                  (mv_ptr[4] & 0xffff0000) >> 16     /* mvy_l0[2] */
1420                 );
1421
1422     cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 |   /* mvx_l1[1]  */
1423                  (mv_ptr[1] & 0xffff)           /* mvx_l1[0] */
1424                 );
1425     cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 |   /* mvx_l1[3]  */
1426                  (mv_ptr[5] & 0xffff)           /* mvx_l1[2] */
1427                 );
1428     cu_msg[8] = ((mv_ptr[3] & 0xffff0000) |         /* mvy_l1[1]  */
1429                  (mv_ptr[1] & 0xffff0000) >> 16     /* mvy_l1[0] */
1430                 );
1431     cu_msg[9] = ((mv_ptr[7] & 0xffff0000) |         /* mvy_l1[3]  */
1432                  (mv_ptr[5] & 0xffff0000) >> 16     /* mvy_l1[2] */
1433                 );
1434
1435     cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 |   /* ref_idx_l1[3]  */
1436                   ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 |   /* ref_idx_l1[2] */
1437                   ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 |    /* ref_idx_l1[1]  */
1438                   ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 |    /* ref_idx_l1[0] */
1439                   ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 |   /* ref_idx_l0[3]  */
1440                   ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8  |   /* ref_idx_l0[2] */
1441                   ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 |     /* ref_idx_l0[1]  */
1442                   ((vme_context->ref_index_in_mb[0] >> 0) & 0xf)            /* ref_idx_l0[0] */
1443                  );
1444
1445     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
1446     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1447                   zero << 16 |  /* reserved  */
1448                   zero          /* tu_xform_Yskip[15:0] */
1449                  );
1450     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
1451                   zero          /* tu_xform_Uskip[15:0] */
1452                  );
1453     cu_msg[14] = zero ;
1454     cu_msg[15] = zero ;
1455 }
1456
1457 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1458 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1459 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1460 #define HEVC_SPLIT_CU_FLAG_8_8   ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1461
1462
1463 void
1464 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1465                                     struct encode_state *encode_state,
1466                                     struct intel_encoder_context *encoder_context,
1467                                     int slice_index,
1468                                     struct intel_batchbuffer *slice_batch)
1469 {
1470     int count, i, start_index;
1471     unsigned int length_in_bits;
1472     VAEncPackedHeaderParameterBuffer *param = NULL;
1473     unsigned int *header_data = NULL;
1474     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1475     int slice_header_index;
1476
1477     if (encode_state->slice_header_index[slice_index] == 0)
1478         slice_header_index = -1;
1479     else
1480         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1481
1482     count = encode_state->slice_rawdata_count[slice_index];
1483     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1484
1485     for (i = 0; i < count; i++) {
1486         unsigned int skip_emul_byte_cnt;
1487
1488         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1489
1490         param = (VAEncPackedHeaderParameterBuffer *)
1491                 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1492
1493         /* skip the slice header packed data type as it is lastly inserted */
1494         if (param->type == VAEncPackedHeaderSlice)
1495             continue;
1496
1497         length_in_bits = param->bit_length;
1498
1499         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1500
1501         /* as the slice header is still required, the last header flag is set to
1502          * zero.
1503          */
1504         mfc_context->insert_object(ctx,
1505                                    encoder_context,
1506                                    header_data,
1507                                    ALIGN(length_in_bits, 32) >> 5,
1508                                    length_in_bits & 0x1f,
1509                                    skip_emul_byte_cnt,
1510                                    0,
1511                                    0,
1512                                    !param->has_emulation_bytes,
1513                                    slice_batch);
1514     }
1515
1516     if (slice_header_index == -1) {
1517         unsigned char *slice_header = NULL;
1518         int slice_header_length_in_bits = 0;
1519         VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1520         VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1521         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1522
1523         /* For the Normal HEVC */
1524         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1525                                       pPicParameter,
1526                                       pSliceParameter,
1527                                       &slice_header,
1528                                       0);
1529         mfc_context->insert_object(ctx, encoder_context,
1530                                    (unsigned int *)slice_header,
1531                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1532                                    slice_header_length_in_bits & 0x1f,
1533                                    5,  /* first 6 bytes are start code + nal unit type */
1534                                    1, 0, 1, slice_batch);
1535         free(slice_header);
1536     } else {
1537         unsigned int skip_emul_byte_cnt;
1538
1539         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1540
1541         param = (VAEncPackedHeaderParameterBuffer *)
1542                 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1543         length_in_bits = param->bit_length;
1544
1545         /* as the slice header is the last header data for one slice,
1546          * the last header flag is set to one.
1547          */
1548         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1549
1550         mfc_context->insert_object(ctx,
1551                                    encoder_context,
1552                                    header_data,
1553                                    ALIGN(length_in_bits, 32) >> 5,
1554                                    length_in_bits & 0x1f,
1555                                    skip_emul_byte_cnt,
1556                                    1,
1557                                    0,
1558                                    !param->has_emulation_bytes,
1559                                    slice_batch);
1560     }
1561
1562     return;
1563 }
1564
1565 static void
1566 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1567         struct encode_state *encode_state,
1568         struct intel_encoder_context *encoder_context,
1569         int slice_index,
1570         struct intel_batchbuffer *slice_batch)
1571 {
1572     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1573     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1574     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1575     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1576     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1577     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1578     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1579     //unsigned char *slice_header = NULL;         // for future use
1580     //int slice_header_length_in_bits = 0;
1581     unsigned int tail_data[] = { 0x0, 0x0 };
1582     int slice_type = pSliceParameter->slice_type;
1583
1584     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1585     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1586     int ctb_size = 1 << log2_ctb_size;
1587     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1588     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1589     int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1590     int ctb_width_in_mb = (ctb_size + 15) / 16;
1591     int i_ctb, ctb_x, ctb_y;
1592     unsigned int split_coding_unit_flag = 0;
1593     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1594     int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size)> 0 ? 1:0;
1595     int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size)> 0 ? 1:0;
1596
1597     int is_intra = (slice_type == HEVC_SLICE_I);
1598     unsigned int *msg = NULL;
1599     unsigned char *msg_ptr = NULL;
1600     int macroblock_address = 0;
1601     int num_cu_record = 64;
1602     int cu_count = 1;
1603     int tmp_mb_mode = 0;
1604     int mb_x = 0, mb_y = 0;
1605     int mb_addr = 0;
1606     int cu_index = 0;
1607     int inter_rdo, intra_rdo;
1608
1609     if (log2_ctb_size == 5) num_cu_record = 16;
1610     else if (log2_ctb_size == 4) num_cu_record = 4;
1611     else if (log2_ctb_size == 6) num_cu_record = 64;
1612     if (rate_control_mode == VA_RC_CBR) {
1613         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1614         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1615     }
1616
1617     /* only support for 8-bit pixel bit-depth */
1618     assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 == 0);
1619     assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 == 0);
1620     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1621     assert(qp >= 0 && qp < 52);
1622
1623     {
1624         gen9_hcpe_hevc_slice_state(ctx,
1625                                    pPicParameter,
1626                                    pSliceParameter,
1627                                    encode_state, encoder_context,
1628                                    slice_batch);
1629
1630         if (slice_index == 0)
1631             intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1632
1633         intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1634
1635         /*
1636         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1637         int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1638
1639         mfc_context->insert_object(ctx, encoder_context,
1640                                    (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1641                                     skip_emul_byte_cnt,
1642                                     1, 0, 1, slice_batch);
1643         free(slice_header);
1644         */
1645     }
1646
1647
1648
1649     split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1650
1651     dri_bo_map(vme_context->vme_output.bo , 1);
1652     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1653     dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1654
1655     for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1656         int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1657         int ctb_height_in_mb_internal = ctb_width_in_mb;
1658         int ctb_width_in_mb_internal = ctb_width_in_mb;
1659         ctb_x = i_ctb % width_in_ctb;
1660         ctb_y = i_ctb / width_in_ctb;
1661         if(ctb_y == (height_in_ctb - 1) && row_pad_flag)  ctb_height_in_mb_internal = 1;
1662         if(ctb_x == (width_in_ctb - 1) && col_pad_flag)  ctb_width_in_mb_internal = 1;
1663
1664         mb_x = 0;
1665         mb_y = 0;
1666         macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1667         split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1668         cu_count = 1;
1669         cu_index = 0;
1670         mb_addr = 0;
1671         msg = NULL;
1672         for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++)
1673         {
1674             mb_addr = macroblock_address + mb_y * width_in_mbs ;
1675             for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++)
1676             {
1677                 /* get the mb info from the vme out */
1678                 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1679
1680                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1681                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1682                 /*fill to indirect cu */
1683                 /*to do */
1684                 if (is_intra || intra_rdo < inter_rdo) {
1685                     /* fill intra cu */
1686                     tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1687                     if (tmp_mb_mode == AVC_INTRA_16X16) {
1688                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1689                     } else { // for 4x4 to use 8x8 replace
1690                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1691                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1692                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1693                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1694                         if(ctb_width_in_mb == 2)
1695                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1696                         else if(ctb_width_in_mb == 1)
1697                             split_coding_unit_flag |= 0x1 << 20;
1698                     }
1699                 } else {
1700                     msg += AVC_INTER_MSG_OFFSET;
1701                     /* fill inter cu */
1702                     tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1703                     if (tmp_mb_mode == AVC_INTER_8X8){
1704                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1705                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1706                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1707                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1708                         if(ctb_width_in_mb == 2)
1709                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1710                         else if(ctb_width_in_mb == 1)
1711                             split_coding_unit_flag |= 0x1 << 20;
1712
1713                     }else if(tmp_mb_mode == AVC_INTER_16X16 ||
1714                         tmp_mb_mode == AVC_INTER_8X16 ||
1715                         tmp_mb_mode == AVC_INTER_16X8) {
1716                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1717                     }
1718                 }
1719                 mb_addr++;
1720             }
1721         }
1722
1723         cu_count = cu_index;
1724         // PAK object fill accordingly.
1725         gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1726     }
1727
1728     dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1729     dri_bo_unmap(vme_context->vme_output.bo);
1730
1731     if (last_slice) {
1732         mfc_context->insert_object(ctx, encoder_context,
1733                                    tail_data, 2, 8,
1734                                    2, 1, 1, 0, slice_batch);
1735     } else {
1736         mfc_context->insert_object(ctx, encoder_context,
1737                                    tail_data, 1, 8,
1738                                    1, 1, 1, 0, slice_batch);
1739     }
1740 }
1741
1742 static dri_bo *
1743 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1744                                     struct encode_state *encode_state,
1745                                     struct intel_encoder_context *encoder_context)
1746 {
1747     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1748     struct intel_batchbuffer *batch;
1749     dri_bo *batch_bo;
1750     int i;
1751
1752     batch = mfc_context->aux_batchbuffer;
1753     batch_bo = batch->buffer;
1754
1755     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1756         gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1757     }
1758
1759     intel_batchbuffer_align(batch, 8);
1760
1761     BEGIN_BCS_BATCH(batch, 2);
1762     OUT_BCS_BATCH(batch, 0);
1763     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1764     ADVANCE_BCS_BATCH(batch);
1765
1766     dri_bo_reference(batch_bo);
1767     intel_batchbuffer_free(batch);
1768     mfc_context->aux_batchbuffer = NULL;
1769
1770     return batch_bo;
1771 }
1772
1773 #else
1774
1775 #endif
1776
1777 static void
1778 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1779                                    struct encode_state *encode_state,
1780                                    struct intel_encoder_context *encoder_context)
1781 {
1782     struct i965_driver_data *i965 = i965_driver_data(ctx);
1783     struct intel_batchbuffer *batch = encoder_context->base.batch;
1784     dri_bo *slice_batch_bo;
1785
1786 #ifdef HCP_SOFTWARE_SKYLAKE
1787     slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1788 #else
1789     slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1790 #endif
1791
1792     // begin programing
1793     if (i965->intel.has_bsd2)
1794         intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1795     else
1796         intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1797     intel_batchbuffer_emit_mi_flush(batch);
1798
1799     // picture level programing
1800     gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1801
1802     BEGIN_BCS_BATCH(batch, 3);
1803     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1804     OUT_BCS_RELOC(batch,
1805                   slice_batch_bo,
1806                   I915_GEM_DOMAIN_COMMAND, 0,
1807                   0);
1808     OUT_BCS_BATCH(batch, 0);
1809     ADVANCE_BCS_BATCH(batch);
1810
1811     // end programing
1812     intel_batchbuffer_end_atomic(batch);
1813
1814     dri_bo_unreference(slice_batch_bo);
1815 }
1816
1817 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1818         struct encode_state *encode_state,
1819         struct intel_encoder_context *encoder_context,
1820         struct intel_batchbuffer *slice_batch)
1821 {
1822     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1823     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1824     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1825     unsigned int skip_emul_byte_cnt;
1826
1827     if (encode_state->packed_header_data[idx]) {
1828         VAEncPackedHeaderParameterBuffer *param = NULL;
1829         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1830         unsigned int length_in_bits;
1831
1832         assert(encode_state->packed_header_param[idx]);
1833         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1834         length_in_bits = param->bit_length;
1835
1836         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1837         mfc_context->insert_object(ctx,
1838                                    encoder_context,
1839                                    header_data,
1840                                    ALIGN(length_in_bits, 32) >> 5,
1841                                    length_in_bits & 0x1f,
1842                                    skip_emul_byte_cnt,
1843                                    0,
1844                                    0,
1845                                    !param->has_emulation_bytes,
1846                                    slice_batch);
1847     }
1848
1849     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
1850
1851     if (encode_state->packed_header_data[idx]) {
1852         VAEncPackedHeaderParameterBuffer *param = NULL;
1853         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1854         unsigned int length_in_bits;
1855
1856         assert(encode_state->packed_header_param[idx]);
1857         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1858         length_in_bits = param->bit_length;
1859
1860         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1861         mfc_context->insert_object(ctx,
1862                                    encoder_context,
1863                                    header_data,
1864                                    ALIGN(length_in_bits, 32) >> 5,
1865                                    length_in_bits & 0x1f,
1866                                    skip_emul_byte_cnt,
1867                                    0,
1868                                    0,
1869                                    !param->has_emulation_bytes,
1870                                    slice_batch);
1871     }
1872
1873     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
1874
1875     if (encode_state->packed_header_data[idx]) {
1876         VAEncPackedHeaderParameterBuffer *param = NULL;
1877         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1878         unsigned int length_in_bits;
1879
1880         assert(encode_state->packed_header_param[idx]);
1881         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1882         length_in_bits = param->bit_length;
1883
1884         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1885
1886         mfc_context->insert_object(ctx,
1887                                    encoder_context,
1888                                    header_data,
1889                                    ALIGN(length_in_bits, 32) >> 5,
1890                                    length_in_bits & 0x1f,
1891                                    skip_emul_byte_cnt,
1892                                    0,
1893                                    0,
1894                                    !param->has_emulation_bytes,
1895                                    slice_batch);
1896     }
1897
1898     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
1899
1900     if (encode_state->packed_header_data[idx]) {
1901         VAEncPackedHeaderParameterBuffer *param = NULL;
1902         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1903         unsigned int length_in_bits;
1904
1905         assert(encode_state->packed_header_param[idx]);
1906         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1907         length_in_bits = param->bit_length;
1908
1909         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1910         mfc_context->insert_object(ctx,
1911                                    encoder_context,
1912                                    header_data,
1913                                    ALIGN(length_in_bits, 32) >> 5,
1914                                    length_in_bits & 0x1f,
1915                                    skip_emul_byte_cnt,
1916                                    0,
1917                                    0,
1918                                    !param->has_emulation_bytes,
1919                                    slice_batch);
1920     } else if (rate_control_mode == VA_RC_CBR) {
1921         // this is frist AU
1922         struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1923
1924         unsigned char *sei_data = NULL;
1925
1926         int length_in_bits = build_hevc_idr_sei_buffer_timing(
1927                                  mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
1928                                  mfc_context->vui_hrd.i_initial_cpb_removal_delay,
1929                                  0,
1930                                  mfc_context->vui_hrd.i_cpb_removal_delay_length,
1931                                  mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
1932                                  mfc_context->vui_hrd.i_dpb_output_delay_length,
1933                                  0,
1934                                  &sei_data);
1935         mfc_context->insert_object(ctx,
1936                                    encoder_context,
1937                                    (unsigned int *)sei_data,
1938                                    ALIGN(length_in_bits, 32) >> 5,
1939                                    length_in_bits & 0x1f,
1940                                    4, /* to do  as NALU header is 2 bytes ,it seems here just offset to start code and keep nalu header*/
1941                                    0,
1942                                    0,
1943                                    1,
1944                                    slice_batch);
1945         free(sei_data);
1946     }
1947 }
1948
1949 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
1950                                  struct encode_state *encode_state,
1951                                  struct intel_encoder_context *encoder_context)
1952 {
1953     struct i965_driver_data *i965 = i965_driver_data(ctx);
1954     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1955     struct object_surface *obj_surface;
1956     struct object_buffer *obj_buffer;
1957     GenHevcSurface *hevc_encoder_surface;
1958     dri_bo *bo;
1959     VAStatus vaStatus = VA_STATUS_SUCCESS;
1960     int i;
1961         struct i965_coded_buffer_segment *coded_buffer_segment;
1962     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1963
1964     /*Setup all the input&output object*/
1965
1966     /* Setup current frame and current direct mv buffer*/
1967     obj_surface = encode_state->reconstructed_object;
1968     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1969
1970     if (obj_surface->private_data == NULL) {
1971         uint32_t size;
1972
1973         if (mfc_context->pic_size.ctb_size == 16)
1974             size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
1975                    ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
1976         else
1977             size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
1978                    ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
1979         size <<= 6; /* in unit of 64bytes */
1980
1981         hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
1982
1983         assert(hevc_encoder_surface);
1984         hevc_encoder_surface->motion_vector_temporal_bo =
1985             dri_bo_alloc(i965->intel.bufmgr,
1986                          "motion vector temporal buffer",
1987                          size,
1988                          0x1000);
1989         assert(hevc_encoder_surface->motion_vector_temporal_bo);
1990
1991         obj_surface->private_data = (void *)hevc_encoder_surface;
1992         obj_surface->free_private_data = (void *)gen_free_hevc_surface;
1993     }
1994     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1995     hevc_encoder_surface->base.frame_store_id = -1;
1996     mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
1997
1998     dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
1999
2000     mfc_context->surface_state.width = obj_surface->orig_width;
2001     mfc_context->surface_state.height = obj_surface->orig_height;
2002     mfc_context->surface_state.w_pitch = obj_surface->width;
2003     mfc_context->surface_state.h_pitch = obj_surface->height;
2004
2005     /* Setup reference frames and direct mv buffers*/
2006     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2007         obj_surface = encode_state->reference_objects[i];
2008
2009         if (obj_surface && obj_surface->bo) {
2010             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2011             dri_bo_reference(obj_surface->bo);
2012
2013             /* Check MV temporal buffer */
2014             if (obj_surface->private_data == NULL) {
2015                 uint32_t size;
2016
2017                 if (mfc_context->pic_size.ctb_size == 16)
2018                     size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
2019                            ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
2020                 else
2021                     size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
2022                            ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
2023                 size <<= 6; /* in unit of 64bytes */
2024
2025                 hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
2026
2027                 if (hevc_encoder_surface) {
2028                     hevc_encoder_surface->motion_vector_temporal_bo =
2029                         dri_bo_alloc(i965->intel.bufmgr,
2030                                      "motion vector temporal buffer",
2031                                      size,
2032                                      0x1000);
2033                     assert(hevc_encoder_surface->motion_vector_temporal_bo);
2034                 }
2035
2036                 obj_surface->private_data = (void *)hevc_encoder_surface;
2037                 obj_surface->free_private_data = (void *)gen_free_hevc_surface;
2038             }
2039
2040             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2041             hevc_encoder_surface->base.frame_store_id = -1;
2042             /* Setup MV temporal buffer */
2043             mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2044             dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2045         } else {
2046             break;
2047         }
2048     }
2049
2050     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2051     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2052
2053     obj_buffer = encode_state->coded_buf_object;
2054     bo = obj_buffer->buffer_store->bo;
2055     mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2056     mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2057     mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2058     dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2059
2060     dri_bo_map(bo, 1);
2061     coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2062     coded_buffer_segment->mapped = 0;
2063     coded_buffer_segment->codec = encoder_context->codec;
2064     dri_bo_unmap(bo);
2065
2066     return vaStatus;
2067 }
2068
2069 /* HEVC BRC related */
2070
2071 static void
2072 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2073         struct gen9_hcpe_context *mfc_context)
2074 {
2075     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2076     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2077     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2078     int ctb_size = 1 << log2_ctb_size;
2079     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2080     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2081
2082     float fps =  pSequenceParameter->vui_time_scale * 0.5 / pSequenceParameter->vui_num_units_in_tick ;
2083     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2084     int intra_mb_size = inter_mb_size * 5.0;
2085     int i;
2086
2087     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2088     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2089     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2090     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2091     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2092     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2093
2094     for (i = 0 ; i < 3; i++) {
2095         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2096         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2097         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2098         mfc_context->bit_rate_control_context[i].GrowInit = 6;
2099         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2100         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2101         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2102
2103         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2104         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2105         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2106         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2107         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2108         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2109     }
2110
2111     mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2112     mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2113     mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2114
2115     mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2116     mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2117     mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2118 }
2119
2120 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2121                                 struct intel_encoder_context* encoder_context)
2122 {
2123     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2124     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2125     VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
2126     VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
2127
2128     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2129     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2130     int ctb_size = 1 << log2_ctb_size;
2131     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2132     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2133
2134
2135     double bitrate = pSequenceParameter->bits_per_second;
2136     double framerate = (double)pSequenceParameter->vui_time_scale / (2 * (double)pSequenceParameter->vui_num_units_in_tick);
2137     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2138     int intra_period = pSequenceParameter->intra_period;
2139     int ip_period = pSequenceParameter->ip_period;
2140     double qp1_size = 0.1 * 8 * 3 * (width_in_ctb << 4) * (height_in_ctb << 4) / 2;
2141     double qp51_size = 0.001 * 8 * 3 * (width_in_ctb << 4) * (height_in_ctb << 4) / 2;
2142     double bpf;
2143
2144     if (pSequenceParameter->ip_period) {
2145         pnum = (intra_period + ip_period - 1) / ip_period - 1;
2146         bnum = intra_period - inum - pnum;
2147     }
2148
2149     mfc_context->brc.mode = encoder_context->rate_control_mode;
2150
2151     mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2152             (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2153     mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2154     mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2155
2156     mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2157     mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2158     mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2159
2160     bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2161
2162     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
2163     mfc_context->hrd.current_buffer_fullness =
2164         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2165         pParameterHRD->initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2166     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2167     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2168     mfc_context->hrd.violation_noted = 0;
2169
2170     if ((bpf > qp51_size) && (bpf < qp1_size)) {
2171         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2172     } else if (bpf >= qp1_size)
2173         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2174     else if (bpf <= qp51_size)
2175         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2176
2177     mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2178     mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2179
2180     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2181     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2182     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2183 }
2184
2185 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2186                           struct gen9_hcpe_context *mfc_context,
2187                           int frame_bits)
2188 {
2189     double prev_bf = mfc_context->hrd.current_buffer_fullness;
2190
2191     mfc_context->hrd.current_buffer_fullness -= frame_bits;
2192
2193     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2194         mfc_context->hrd.current_buffer_fullness = prev_bf;
2195         return BRC_UNDERFLOW;
2196     }
2197
2198     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2199     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2200         if (mfc_context->brc.mode == VA_RC_VBR)
2201             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2202         else {
2203             mfc_context->hrd.current_buffer_fullness = prev_bf;
2204             return BRC_OVERFLOW;
2205         }
2206     }
2207     return BRC_NO_HRD_VIOLATION;
2208 }
2209
2210 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2211                             struct gen9_hcpe_context *mfc_context,
2212                             int frame_bits)
2213 {
2214     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2215     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2216     int slicetype = pSliceParameter->slice_type;
2217     int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2218     int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2219     int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2220     int qp; // quantizer of previously encoded slice of current type
2221     int qpn; // predicted quantizer for next frame of current type in integer format
2222     double qpf; // predicted quantizer for next frame of current type in float format
2223     double delta_qp; // QP correction
2224     int target_frame_size, frame_size_next;
2225     /* Notes:
2226      *  x - how far we are from HRD buffer borders
2227      *  y - how far we are from target HRD buffer fullness
2228      */
2229     double x, y;
2230     double frame_size_alpha;
2231
2232     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2233
2234     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2235     if (mfc_context->hrd.buffer_capacity < 5)
2236         frame_size_alpha = 0;
2237     else
2238         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2239     if (frame_size_alpha > 30) frame_size_alpha = 30;
2240     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2241                       (double)(frame_size_alpha + 1.);
2242
2243     /* frame_size_next: avoiding negative number and too small value */
2244     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2245         frame_size_next = (int)((double)target_frame_size * 0.25);
2246
2247     qpf = (double)qp * target_frame_size / frame_size_next;
2248     qpn = (int)(qpf + 0.5);
2249
2250     if (qpn == qp) {
2251         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2252         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2253         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2254             qpn++;
2255             mfc_context->brc.qpf_rounding_accumulator = 0.;
2256         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2257             qpn--;
2258             mfc_context->brc.qpf_rounding_accumulator = 0.;
2259         }
2260     }
2261     /* making sure that QP is not changing too fast */
2262     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2263     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2264     /* making sure that with QP predictions we did do not leave QPs range */
2265     BRC_CLIP(qpn, 1, 51);
2266
2267     /* checking wthether HRD compliance is still met */
2268     sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2269
2270     /* calculating QP delta as some function*/
2271     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2272     if (x > 0) {
2273         x /= mfc_context->hrd.target_buffer_fullness;
2274         y = mfc_context->hrd.current_buffer_fullness;
2275     } else {
2276         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2277         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2278     }
2279     if (y < 0.01) y = 0.01;
2280     if (x > 1) x = 1;
2281     else if (x < -1) x = -1;
2282
2283     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2284     qpn = (int)(qpn + delta_qp + 0.5);
2285
2286     /* making sure that with QP predictions we did do not leave QPs range */
2287     BRC_CLIP(qpn, 1, 51);
2288
2289     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2290         /* correcting QPs of slices of other types */
2291         if (slicetype == HEVC_SLICE_P) {
2292             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2293                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2294             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2295                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2296         } else if (slicetype == HEVC_SLICE_I) {
2297             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2298                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2299             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2300                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2301         } else { // HEVC_SLICE_B
2302             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2303                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2304             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2305                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2306         }
2307         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2308         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2309         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2310     } else if (sts == BRC_UNDERFLOW) { // underflow
2311         if (qpn <= qp) qpn = qp + 1;
2312         if (qpn > 51) {
2313             qpn = 51;
2314             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2315         }
2316     } else if (sts == BRC_OVERFLOW) {
2317         if (qpn >= qp) qpn = qp - 1;
2318         if (qpn < 1) { // < 0 (?) overflow with minQP
2319             qpn = 1;
2320             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2321         }
2322     }
2323
2324     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2325
2326     return sts;
2327 }
2328
2329 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2330                                         struct intel_encoder_context *encoder_context)
2331 {
2332     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2333     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2334     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2335     int target_bit_rate = pSequenceParameter->bits_per_second;
2336
2337     // current we only support CBR mode.
2338     if (rate_control_mode == VA_RC_CBR) {
2339         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2340         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2341         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2342         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2343         mfc_context->vui_hrd.i_frame_number = 0;
2344
2345         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2346         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2347         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2348     }
2349
2350 }
2351
2352 void
2353 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2354                               struct gen9_hcpe_context *mfc_context)
2355 {
2356     mfc_context->vui_hrd.i_frame_number++;
2357 }
2358
2359 int intel_hcpe_interlace_check(VADriverContextP ctx,
2360                                struct encode_state *encode_state,
2361                                struct intel_encoder_context *encoder_context)
2362 {
2363     VAEncSliceParameterBufferHEVC *pSliceParameter;
2364     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2365     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2366     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2367     int ctb_size = 1 << log2_ctb_size;
2368     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2369     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2370     int i;
2371     int ctbCount = 0;
2372
2373     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2374         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2375         ctbCount += pSliceParameter->num_ctu_in_slice;
2376     }
2377
2378     if (ctbCount == (width_in_ctb * height_in_ctb))
2379         return 0;
2380
2381     return 1;
2382 }
2383
2384 /*
2385  * Check whether the parameters related with CBR are updated and decide whether
2386  * it needs to reinitialize the configuration related with CBR.
2387  * Currently it will check the following parameters:
2388  *      bits_per_second
2389  *      frame_rate
2390  *      gop_configuration(intra_period, ip_period, intra_idr_period)
2391  */
2392 static bool intel_hcpe_brc_updated_check(struct encode_state *encode_state,
2393         struct intel_encoder_context *encoder_context)
2394 {
2395     /* to do */
2396     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2397     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2398     double cur_fps, cur_bitrate;
2399     VAEncSequenceParameterBufferHEVC *pSequenceParameter;
2400
2401
2402     if (rate_control_mode != VA_RC_CBR) {
2403         return false;
2404     }
2405
2406     pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2407
2408     cur_bitrate = pSequenceParameter->bits_per_second;
2409     cur_fps = (double)pSequenceParameter->vui_time_scale /
2410               (2 * (double)pSequenceParameter->vui_num_units_in_tick);
2411
2412     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
2413         (cur_fps == mfc_context->brc.saved_fps) &&
2414         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
2415         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
2416         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
2417         /* the parameters related with CBR are not updaetd */
2418         return false;
2419     }
2420
2421     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
2422     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
2423     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
2424     mfc_context->brc.saved_fps = cur_fps;
2425     mfc_context->brc.saved_bps = cur_bitrate;
2426     return true;
2427 }
2428
2429 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2430                             struct intel_encoder_context *encoder_context)
2431 {
2432     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2433     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2434
2435     if (rate_control_mode == VA_RC_CBR) {
2436         bool brc_updated;
2437         assert(encoder_context->codec != CODEC_MPEG2);
2438
2439         brc_updated = intel_hcpe_brc_updated_check(encode_state, encoder_context);
2440
2441         /*Programing bit rate control */
2442         if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2443             brc_updated) {
2444             intel_hcpe_bit_rate_control_context_init(encode_state, mfc_context);
2445             intel_hcpe_brc_init(encode_state, encoder_context);
2446         }
2447
2448         /*Programing HRD control */
2449         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2450             intel_hcpe_hrd_context_init(encode_state, encoder_context);
2451     }
2452 }
2453
2454 /* HEVC interface API for encoder */
2455
2456 static VAStatus
2457 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2458                               struct encode_state *encode_state,
2459                               struct intel_encoder_context *encoder_context)
2460 {
2461     struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2462     unsigned int rate_control_mode = encoder_context->rate_control_mode;
2463     int current_frame_bits_size;
2464     int sts;
2465
2466     for (;;) {
2467         gen9_hcpe_init(ctx, encode_state, encoder_context);
2468         intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2469         /*Programing bcs pipeline*/
2470         gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2471         gen9_hcpe_run(ctx, encode_state, encoder_context);
2472         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2473             gen9_hcpe_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
2474             sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2475             if (sts == BRC_NO_HRD_VIOLATION) {
2476                 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2477                 break;
2478             } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2479                 if (!hcpe_context->hrd.violation_noted) {
2480                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2481                     hcpe_context->hrd.violation_noted = 1;
2482                 }
2483                 return VA_STATUS_SUCCESS;
2484             }
2485         } else {
2486             break;
2487         }
2488     }
2489
2490     return VA_STATUS_SUCCESS;
2491 }
2492
2493 void
2494 gen9_hcpe_context_destroy(void *context)
2495 {
2496     struct gen9_hcpe_context *hcpe_context = context;
2497     int i;
2498
2499     dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2500     hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2501
2502     dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2503     hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2504
2505     dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2506     hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2507
2508     dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2509     hcpe_context->uncompressed_picture_source.bo = NULL;
2510
2511     dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2512     hcpe_context->metadata_line_buffer.bo = NULL;
2513
2514     dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2515     hcpe_context->metadata_tile_line_buffer.bo = NULL;
2516
2517     dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2518     hcpe_context->metadata_tile_column_buffer.bo = NULL;
2519
2520     dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2521     hcpe_context->sao_line_buffer.bo = NULL;
2522
2523     dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2524     hcpe_context->sao_tile_line_buffer.bo = NULL;
2525
2526     dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2527     hcpe_context->sao_tile_column_buffer.bo = NULL;
2528
2529     /* mv temporal buffer */
2530     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2531         if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2532             dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2533         hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2534     }
2535
2536     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2537         dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2538         hcpe_context->reference_surfaces[i].bo = NULL;
2539     }
2540
2541     dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2542     hcpe_context->hcp_indirect_cu_object.bo = NULL;
2543
2544     dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2545     hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2546
2547     dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2548     hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2549
2550     dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2551     hcpe_context->aux_batchbuffer_surface.bo = NULL;
2552
2553     if (hcpe_context->aux_batchbuffer)
2554         intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2555
2556     hcpe_context->aux_batchbuffer = NULL;
2557
2558     free(hcpe_context);
2559 }
2560
2561 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2562                             VAProfile profile,
2563                             struct encode_state *encode_state,
2564                             struct intel_encoder_context *encoder_context)
2565 {
2566     VAStatus vaStatus;
2567
2568     switch (profile) {
2569     case VAProfileHEVCMain:
2570         vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2571         break;
2572
2573     default:
2574         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2575         break;
2576     }
2577
2578     return vaStatus;
2579 }
2580
2581 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2582 {
2583     struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2584
2585     assert(hcpe_context);
2586     hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2587     hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2588     hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2589     hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2590     hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2591     hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2592     hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2593     hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2594
2595     encoder_context->mfc_context = hcpe_context;
2596     encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2597     encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2598     encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2599
2600     hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);
2601
2602     return True;
2603 }