OSDN Git Service

HEVC10bit ENC: work around gpu hang when p010->nv12
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vme.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #include "gen9_mfc.h"
44 #include "intel_media.h"
45
46 #ifdef SURFACE_STATE_PADDED_SIZE
47 #undef SURFACE_STATE_PADDED_SIZE
48 #endif
49
50 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
51 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
52 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
53
54 #define VME_INTRA_SHADER        0
55 #define VME_INTER_SHADER        1
56 #define VME_BINTER_SHADER       2
57
58 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
59 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
60 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
61
62 #define VME_MSG_LENGTH          32
63
64 static const uint32_t gen9_vme_intra_frame[][4] = {
65 #include "shaders/vme/intra_frame_gen9.g9b"
66 };
67
68 static const uint32_t gen9_vme_inter_frame[][4] = {
69 #include "shaders/vme/inter_frame_gen9.g9b"
70 };
71
72 static const uint32_t gen9_vme_inter_bframe[][4] = {
73 #include "shaders/vme/inter_bframe_gen9.g9b"
74 };
75
76 static struct i965_kernel gen9_vme_kernels[] = {
77     {
78         "VME Intra Frame",
79         VME_INTRA_SHADER, /*index*/
80         gen9_vme_intra_frame,
81         sizeof(gen9_vme_intra_frame),
82         NULL
83     },
84     {
85         "VME inter Frame",
86         VME_INTER_SHADER,
87         gen9_vme_inter_frame,
88         sizeof(gen9_vme_inter_frame),
89         NULL
90     },
91     {
92         "VME inter BFrame",
93         VME_BINTER_SHADER,
94         gen9_vme_inter_bframe,
95         sizeof(gen9_vme_inter_bframe),
96         NULL
97     }
98 };
99
100 static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = {
101 #include "shaders/vme/intra_frame_gen9.g9b"
102 };
103
104 static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = {
105 #include "shaders/vme/mpeg2_inter_gen9.g9b"
106 };
107
108 static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
109     {
110         "VME Intra Frame",
111         VME_INTRA_SHADER, /*index*/
112         gen9_vme_mpeg2_intra_frame,
113         sizeof(gen9_vme_mpeg2_intra_frame),
114         NULL
115     },
116     {
117         "VME inter Frame",
118         VME_INTER_SHADER,
119         gen9_vme_mpeg2_inter_frame,
120         sizeof(gen9_vme_mpeg2_inter_frame),
121         NULL
122     },
123 };
124
125 static const uint32_t gen9_vme_vp8_intra_frame[][4] = {
126 #include "shaders/vme/vp8_intra_frame_gen9.g9b"
127 };
128
129 static const uint32_t gen9_vme_vp8_inter_frame[][4] = {
130 #include "shaders/vme/vp8_inter_frame_gen9.g9b"
131 };
132
133 static struct i965_kernel gen9_vme_vp8_kernels[] = {
134     {
135         "VME Intra Frame",
136         VME_INTRA_SHADER, /*index*/
137         gen9_vme_vp8_intra_frame,
138         sizeof(gen9_vme_vp8_intra_frame),
139         NULL
140     },
141     {
142         "VME inter Frame",
143         VME_INTER_SHADER,
144         gen9_vme_vp8_inter_frame,
145         sizeof(gen9_vme_vp8_inter_frame),
146         NULL
147     },
148 };
149
150 /* HEVC */
151
152 static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
153 #include "shaders/vme/intra_frame_gen9.g9b"
154 };
155
156 static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
157 #include "shaders/vme/inter_frame_gen9.g9b"
158 };
159
160 static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
161 #include "shaders/vme/inter_bframe_gen9.g9b"
162 };
163
164 static struct i965_kernel gen9_vme_hevc_kernels[] = {
165     {
166         "VME Intra Frame",
167         VME_INTRA_SHADER, /*index*/
168         gen9_vme_hevc_intra_frame,
169         sizeof(gen9_vme_hevc_intra_frame),
170         NULL
171     },
172     {
173         "VME inter Frame",
174         VME_INTER_SHADER,
175         gen9_vme_hevc_inter_frame,
176         sizeof(gen9_vme_hevc_inter_frame),
177         NULL
178     },
179     {
180         "VME inter BFrame",
181         VME_BINTER_SHADER,
182         gen9_vme_hevc_inter_bframe,
183         sizeof(gen9_vme_hevc_inter_bframe),
184         NULL
185     }
186 };
187 /* only used for VME source surface state */
188 static void
189 gen9_vme_source_surface_state(VADriverContextP ctx,
190                               int index,
191                               struct object_surface *obj_surface,
192                               struct intel_encoder_context *encoder_context)
193 {
194     struct gen6_vme_context *vme_context = encoder_context->vme_context;
195
196     vme_context->vme_surface2_setup(ctx,
197                                     &vme_context->gpe_context,
198                                     obj_surface,
199                                     BINDING_TABLE_OFFSET(index),
200                                     SURFACE_STATE_OFFSET(index));
201 }
202
203 static void
204 gen9_vme_media_source_surface_state(VADriverContextP ctx,
205                                     int index,
206                                     struct object_surface *obj_surface,
207                                     struct intel_encoder_context *encoder_context)
208 {
209     struct gen6_vme_context *vme_context = encoder_context->vme_context;
210
211     vme_context->vme_media_rw_surface_setup(ctx,
212                                             &vme_context->gpe_context,
213                                             obj_surface,
214                                             BINDING_TABLE_OFFSET(index),
215                                             SURFACE_STATE_OFFSET(index),
216                                             0);
217 }
218
219 static void
220 gen9_vme_media_chroma_source_surface_state(VADriverContextP ctx,
221                                            int index,
222                                            struct object_surface *obj_surface,
223                                            struct intel_encoder_context *encoder_context)
224 {
225     struct gen6_vme_context *vme_context = encoder_context->vme_context;
226
227     vme_context->vme_media_chroma_surface_setup(ctx,
228                                                 &vme_context->gpe_context,
229                                                 obj_surface,
230                                                 BINDING_TABLE_OFFSET(index),
231                                                 SURFACE_STATE_OFFSET(index),
232                                                 0);
233 }
234
235 static void
236 gen9_vme_output_buffer_setup(VADriverContextP ctx,
237                              struct encode_state *encode_state,
238                              int index,
239                              struct intel_encoder_context *encoder_context,
240                              int is_intra,
241                              int width_in_mbs,
242                              int height_in_mbs)
243
244 {
245     struct i965_driver_data *i965 = i965_driver_data(ctx);
246     struct gen6_vme_context *vme_context = encoder_context->vme_context;
247
248     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
249     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
250
251     if (is_intra)
252         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
253     else
254         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
255     /*
256      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
257      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
258      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
259      */
260
261     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
262                                               "VME output buffer",
263                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
264                                               0x1000);
265     assert(vme_context->vme_output.bo);
266     vme_context->vme_buffer_suface_setup(ctx,
267                                          &vme_context->gpe_context,
268                                          &vme_context->vme_output,
269                                          BINDING_TABLE_OFFSET(index),
270                                          SURFACE_STATE_OFFSET(index));
271 }
272
273 static void
274 gen9_vme_avc_output_buffer_setup(VADriverContextP ctx,
275                              struct encode_state *encode_state,
276                              int index,
277                              struct intel_encoder_context *encoder_context)
278 {
279     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
280     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
281     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
282     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
283     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
284
285     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
286
287 }
288
289 static void
290 gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
291                                       struct encode_state *encode_state,
292                                       int index,
293                                       struct intel_encoder_context *encoder_context,
294                                       int width_in_mbs,
295                                       int height_in_mbs)
296 {
297     struct i965_driver_data *i965 = i965_driver_data(ctx);
298     struct gen6_vme_context *vme_context = encoder_context->vme_context;
299
300     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
301     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
302     vme_context->vme_batchbuffer.pitch = 16;
303     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
304                                                    "VME batchbuffer",
305                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
306                                                    0x1000);
307     vme_context->vme_buffer_suface_setup(ctx,
308                                          &vme_context->gpe_context,
309                                          &vme_context->vme_batchbuffer,
310                                          BINDING_TABLE_OFFSET(index),
311                                          SURFACE_STATE_OFFSET(index));
312 }
313
314 static void
315 gen9_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
316                                       struct encode_state *encode_state,
317                                       int index,
318                                       struct intel_encoder_context *encoder_context)
319 {
320     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
321     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
322     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
323
324     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
325 }
326
327                                       
328 static VAStatus
329 gen9_vme_surface_setup(VADriverContextP ctx,
330                        struct encode_state *encode_state,
331                        int is_intra,
332                        struct intel_encoder_context *encoder_context)
333 {
334     struct object_surface *obj_surface;
335
336     /*Setup surfaces state*/
337     /* current picture for encoding */
338     obj_surface = encode_state->input_yuv_object;
339     assert(obj_surface);
340     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
341     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
342     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
343
344     if (!is_intra) {
345         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
346         int slice_type;
347
348         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
349         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
350
351         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
352
353         if (slice_type == SLICE_TYPE_B)
354             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
355     }
356
357     /* VME output */
358     gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
359     gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
360     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
361                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
362                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
363
364     return VA_STATUS_SUCCESS;
365 }
366
367 static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
368                                          struct encode_state *encode_state,
369                                          struct intel_encoder_context *encoder_context)
370 {
371     struct gen6_vme_context *vme_context = encoder_context->vme_context;
372     struct gen8_interface_descriptor_data *desc;
373     int i;
374     dri_bo *bo;
375     unsigned char *desc_ptr;
376
377     bo = vme_context->gpe_context.dynamic_state.bo;
378     dri_bo_map(bo, 1);
379     assert(bo->virtual);
380     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
381
382     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
383
384     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
385         struct i965_kernel *kernel;
386         kernel = &vme_context->gpe_context.kernels[i];
387         assert(sizeof(*desc) == 32);
388         /*Setup the descritor table*/
389         memset(desc, 0, sizeof(*desc));
390         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
391         desc->desc3.sampler_count = 0; /* FIXME: */
392         desc->desc3.sampler_state_pointer = 0;
393         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
394         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
395         desc->desc5.constant_urb_entry_read_offset = 0;
396         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
397
398         desc++;
399     }
400
401     dri_bo_unmap(bo);
402
403     return VA_STATUS_SUCCESS;
404 }
405
406 static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
407                                         struct encode_state *encode_state,
408                                         struct intel_encoder_context *encoder_context,
409                                         int denom)
410 {
411     struct gen6_vme_context *vme_context = encoder_context->vme_context;
412     unsigned char *constant_buffer;
413     unsigned int *vme_state_message;
414     int mv_num = 32;
415
416     vme_state_message = (unsigned int *)vme_context->vme_state_message;
417
418     if (encoder_context->codec == CODEC_H264 ||
419         encoder_context->codec == CODEC_H264_MVC) {
420         if (vme_context->h264_level >= 30) {
421             mv_num = 16 / denom;
422
423             if (vme_context->h264_level >= 31)
424                 mv_num = 8 / denom;
425         }
426     } else if (encoder_context->codec == CODEC_MPEG2) {
427         mv_num = 2 / denom;
428     }else if (encoder_context->codec == CODEC_HEVC) {
429         if (vme_context->hevc_level >= 30*3) {
430             mv_num = 16;
431
432             if (vme_context->hevc_level >= 31*3)
433                 mv_num = 8;
434         }/* use the avc level setting */
435     }
436
437     vme_state_message[31] = mv_num;
438
439     dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
440     assert(vme_context->gpe_context.dynamic_state.bo->virtual);
441     constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
442                                          vme_context->gpe_context.curbe_offset;
443
444     /* VME MV/Mb cost table is passed by using const buffer */
445     /* Now it uses the fixed search path. So it is constructed directly
446      * in the GPU shader.
447      */
448     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
449
450     dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
451
452     return VA_STATUS_SUCCESS;
453 }
454
455 #define         MB_SCOREBOARD_A         (1 << 0)
456 #define         MB_SCOREBOARD_B         (1 << 1)
457 #define         MB_SCOREBOARD_C         (1 << 2)
458
459 /* check whether the mb of (x_index, y_index) is out of bound */
460 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
461 {
462     int mb_index;
463     if (x_index < 0 || x_index >= mb_width)
464         return -1;
465     if (y_index < 0 || y_index >= mb_height)
466         return -1;
467
468     mb_index = y_index * mb_width + x_index;
469     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
470         return -1;
471     return 0;
472 }
473
474 static void
475 gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
476                                      struct encode_state *encode_state,
477                                      int mb_width, int mb_height,
478                                      int kernel,
479                                      int transform_8x8_mode_flag,
480                                      struct intel_encoder_context *encoder_context)
481 {
482     struct gen6_vme_context *vme_context = encoder_context->vme_context;
483     int mb_row;
484     int s;
485     unsigned int *command_ptr;
486
487 #define         USE_SCOREBOARD          (1 << 21)
488
489     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
490     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
491
492     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
493         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
494         int first_mb = pSliceParameter->macroblock_address;
495         int num_mb = pSliceParameter->num_macroblocks;
496         unsigned int mb_intra_ub, score_dep;
497         int x_outer, y_outer, x_inner, y_inner;
498         int xtemp_outer = 0;
499
500         x_outer = first_mb % mb_width;
501         y_outer = first_mb / mb_width;
502         mb_row = y_outer;
503
504         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
505             x_inner = x_outer;
506             y_inner = y_outer;
507             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
508                 mb_intra_ub = 0;
509                 score_dep = 0;
510                 if (x_inner != 0) {
511                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
512                     score_dep |= MB_SCOREBOARD_A;
513                 }
514                 if (y_inner != mb_row) {
515                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
516                     score_dep |= MB_SCOREBOARD_B;
517                     if (x_inner != 0)
518                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
519                     if (x_inner != (mb_width -1)) {
520                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
521                         score_dep |= MB_SCOREBOARD_C;
522                     }
523                 }
524
525                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
526                 *command_ptr++ = kernel;
527                 *command_ptr++ = USE_SCOREBOARD;
528                 /* Indirect data */
529                 *command_ptr++ = 0;
530                 /* the (X, Y) term of scoreboard */
531                 *command_ptr++ = ((y_inner << 16) | x_inner);
532                 *command_ptr++ = score_dep;
533                 /*inline data */
534                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
535                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
536                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
537                 *command_ptr++ = 0;
538
539                 x_inner -= 2;
540                 y_inner += 1;
541             }
542             x_outer += 1;
543         }
544
545         xtemp_outer = mb_width - 2;
546         if (xtemp_outer < 0)
547             xtemp_outer = 0;
548         x_outer = xtemp_outer;
549         y_outer = first_mb / mb_width;
550         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
551             y_inner = y_outer;
552             x_inner = x_outer;
553             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
554                 mb_intra_ub = 0;
555                 score_dep = 0;
556                 if (x_inner != 0) {
557                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
558                     score_dep |= MB_SCOREBOARD_A;
559                 }
560                 if (y_inner != mb_row) {
561                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
562                     score_dep |= MB_SCOREBOARD_B;
563                     if (x_inner != 0)
564                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
565
566                     if (x_inner != (mb_width -1)) {
567                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
568                         score_dep |= MB_SCOREBOARD_C;
569                     }
570                 }
571
572                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
573                 *command_ptr++ = kernel;
574                 *command_ptr++ = USE_SCOREBOARD;
575                 /* Indirect data */
576                 *command_ptr++ = 0;
577                 /* the (X, Y) term of scoreboard */
578                 *command_ptr++ = ((y_inner << 16) | x_inner);
579                 *command_ptr++ = score_dep;
580                 /*inline data */
581                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
582                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
583
584                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
585                 *command_ptr++ = 0;
586                 x_inner -= 2;
587                 y_inner += 1;
588             }
589             x_outer++;
590             if (x_outer >= mb_width) {
591                 y_outer += 1;
592                 x_outer = xtemp_outer;
593             }
594         }
595     }
596
597     *command_ptr++ = MI_BATCH_BUFFER_END;
598     *command_ptr++ = 0;
599
600     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
601 }
602
603 static void
604 gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
605                               struct encode_state *encode_state,
606                               int mb_width, int mb_height,
607                               int kernel,
608                               int transform_8x8_mode_flag,
609                               struct intel_encoder_context *encoder_context)
610 {
611     struct gen6_vme_context *vme_context = encoder_context->vme_context;
612     int mb_x = 0, mb_y = 0;
613     int i, s;
614     unsigned int *command_ptr;
615     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
616     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
617     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
618     int qp;
619     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
620     int qp_mb, qp_index;
621
622     if (encoder_context->rate_control_mode == VA_RC_CQP)
623         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
624     else
625         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
626
627     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
628     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
629
630     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
631         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
632         int slice_mb_begin = pSliceParameter->macroblock_address;
633         int slice_mb_number = pSliceParameter->num_macroblocks;
634         unsigned int mb_intra_ub;
635         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
636         for (i = 0; i < slice_mb_number;  ) {
637             int mb_count = i + slice_mb_begin;
638             mb_x = mb_count % mb_width;
639             mb_y = mb_count / mb_width;
640             mb_intra_ub = 0;
641             if (mb_x != 0) {
642                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
643             }
644             if (mb_y != 0) {
645                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
646                 if (mb_x != 0)
647                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
648                 if (mb_x != (mb_width -1))
649                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
650             }
651             if (i < mb_width) {
652                 if (i == 0)
653                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
654                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
655                 if ((i == (mb_width - 1)) && slice_mb_x) {
656                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
657                 }
658             }
659
660             if ((i == mb_width) && slice_mb_x) {
661                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
662             }
663             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
664             *command_ptr++ = kernel;
665             *command_ptr++ = 0;
666             *command_ptr++ = 0;
667             *command_ptr++ = 0;
668             *command_ptr++ = 0;
669
670             /*inline data */
671             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
672             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
673             /* qp occupies one byte */
674             if (vme_context->roi_enabled) {
675                 qp_index = mb_y * mb_width + mb_x;
676                 qp_mb = *(vme_context->qp_per_mb + qp_index);
677             } else
678                 qp_mb = qp;
679             *command_ptr++ = qp_mb;
680
681             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
682             *command_ptr++ = 0;
683             i += 1;
684         }
685     }
686
687     *command_ptr++ = MI_BATCH_BUFFER_END;
688     *command_ptr++ = 0;
689
690     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
691 }
692
693 static void gen9_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
694 {
695     struct gen6_vme_context *vme_context = encoder_context->vme_context;
696
697     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
698
699     /* VME output buffer */
700     dri_bo_unreference(vme_context->vme_output.bo);
701     vme_context->vme_output.bo = NULL;
702
703     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
704     vme_context->vme_batchbuffer.bo = NULL;
705
706     /* VME state */
707     dri_bo_unreference(vme_context->vme_state.bo);
708     vme_context->vme_state.bo = NULL;
709 }
710
711 static void gen9_vme_pipeline_programing(VADriverContextP ctx,
712                                          struct encode_state *encode_state,
713                                          struct intel_encoder_context *encoder_context)
714 {
715     struct gen6_vme_context *vme_context = encoder_context->vme_context;
716     struct intel_batchbuffer *batch = encoder_context->base.batch;
717     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
718     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
719     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
720     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
721     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
722     int kernel_shader;
723     bool allow_hwscore = true;
724     int s;
725     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
726
727     if (is_low_quality)
728         allow_hwscore = false;
729     else {
730         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
731             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
732             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
733                 allow_hwscore = false;
734                 break;
735             }
736         }
737     }
738
739     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
740         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
741         kernel_shader = VME_INTRA_SHADER;
742     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
743                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
744         kernel_shader = VME_INTER_SHADER;
745     } else {
746         kernel_shader = VME_BINTER_SHADER;
747         if (!allow_hwscore)
748             kernel_shader = VME_INTER_SHADER;
749     }
750     if (allow_hwscore)
751         gen9wa_vme_walker_fill_vme_batchbuffer(ctx,
752                                                encode_state,
753                                                width_in_mbs, height_in_mbs,
754                                                kernel_shader,
755                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
756                                                encoder_context);
757     else
758         gen9_vme_fill_vme_batchbuffer(ctx,
759                                       encode_state,
760                                       width_in_mbs, height_in_mbs,
761                                       kernel_shader,
762                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
763                                       encoder_context);
764
765     intel_batchbuffer_start_atomic(batch, 0x1000);
766     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
767     BEGIN_BATCH(batch, 3);
768     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
769     OUT_RELOC(batch,
770               vme_context->vme_batchbuffer.bo,
771               I915_GEM_DOMAIN_COMMAND, 0,
772               0);
773     OUT_BATCH(batch, 0);
774     ADVANCE_BATCH(batch);
775
776     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
777
778     intel_batchbuffer_end_atomic(batch);
779 }
780
781 static VAStatus gen9_vme_prepare(VADriverContextP ctx,
782                                  struct encode_state *encode_state,
783                                  struct intel_encoder_context *encoder_context)
784 {
785     VAStatus vaStatus = VA_STATUS_SUCCESS;
786     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
787     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
788     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
789     struct gen6_vme_context *vme_context = encoder_context->vme_context;
790
791     if (!vme_context->h264_level ||
792         (vme_context->h264_level != pSequenceParameter->level_idc)) {
793             vme_context->h264_level = pSequenceParameter->level_idc;
794     }
795
796     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
797     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
798     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
799
800     /*Setup all the memory object*/
801     gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
802     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
803     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
804     gen9_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1);
805
806     /*Programing media pipeline*/
807     gen9_vme_pipeline_programing(ctx, encode_state, encoder_context);
808
809     return vaStatus;
810 }
811
812 static VAStatus gen9_vme_run(VADriverContextP ctx,
813                              struct encode_state *encode_state,
814                              struct intel_encoder_context *encoder_context)
815 {
816     struct intel_batchbuffer *batch = encoder_context->base.batch;
817
818     intel_batchbuffer_flush(batch);
819
820     return VA_STATUS_SUCCESS;
821 }
822
823 static VAStatus gen9_vme_stop(VADriverContextP ctx,
824                               struct encode_state *encode_state,
825                               struct intel_encoder_context *encoder_context)
826 {
827     return VA_STATUS_SUCCESS;
828 }
829
830 static VAStatus
831 gen9_vme_pipeline(VADriverContextP ctx,
832                   VAProfile profile,
833                   struct encode_state *encode_state,
834                   struct intel_encoder_context *encoder_context)
835 {
836     gen9_vme_media_init(ctx, encoder_context);
837     gen9_vme_prepare(ctx, encode_state, encoder_context);
838     gen9_vme_run(ctx, encode_state, encoder_context);
839     gen9_vme_stop(ctx, encode_state, encoder_context);
840
841     return VA_STATUS_SUCCESS;
842 }
843
844 static void
845 gen9_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
846                                    struct encode_state *encode_state,
847                                    int index,
848                                    int is_intra,
849                                    struct intel_encoder_context *encoder_context)
850
851 {
852     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
853     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
854     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
855
856     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
857 }
858
859 static void
860 gen9_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
861                                             struct encode_state *encode_state,
862                                             int index,
863                                             struct intel_encoder_context *encoder_context)
864
865 {
866     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
867     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
868     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
869
870     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
871 }
872
873 static VAStatus
874 gen9_vme_mpeg2_surface_setup(VADriverContextP ctx,
875                              struct encode_state *encode_state,
876                              int is_intra,
877                              struct intel_encoder_context *encoder_context)
878 {
879     struct object_surface *obj_surface;
880
881     /*Setup surfaces state*/
882     /* current picture for encoding */
883     obj_surface = encode_state->input_yuv_object;
884     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
885     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
886     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
887
888     if (!is_intra) {
889         /* reference 0 */
890         obj_surface = encode_state->reference_objects[0];
891
892         if (obj_surface->bo != NULL)
893             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
894
895         /* reference 1 */
896         obj_surface = encode_state->reference_objects[1];
897
898         if (obj_surface && obj_surface->bo != NULL)
899             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
900     }
901
902     /* VME output */
903     gen9_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
904     gen9_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
905
906     return VA_STATUS_SUCCESS;
907 }
908
909 static void
910 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
911                                            struct encode_state *encode_state,
912                                            int mb_width, int mb_height,
913                                            int kernel,
914                                            struct intel_encoder_context *encoder_context)
915 {
916     struct gen6_vme_context *vme_context = encoder_context->vme_context;
917     unsigned int *command_ptr;
918
919 #define         MPEG2_SCOREBOARD                (1 << 21)
920
921     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
922     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
923
924     {
925         unsigned int mb_intra_ub, score_dep;
926         int x_outer, y_outer, x_inner, y_inner;
927         int xtemp_outer = 0;
928         int first_mb = 0;
929         int num_mb = mb_width * mb_height;
930
931         x_outer = 0;
932         y_outer = 0;
933
934         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
935             x_inner = x_outer;
936             y_inner = y_outer;
937             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
938                 mb_intra_ub = 0;
939                 score_dep = 0;
940                 if (x_inner != 0) {
941                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
942                     score_dep |= MB_SCOREBOARD_A;
943                 }
944                 if (y_inner != 0) {
945                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
946                     score_dep |= MB_SCOREBOARD_B;
947
948                     if (x_inner != 0)
949                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
950
951                     if (x_inner != (mb_width -1)) {
952                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
953                         score_dep |= MB_SCOREBOARD_C;
954                     }
955                 }
956
957                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
958                 *command_ptr++ = kernel;
959                 *command_ptr++ = MPEG2_SCOREBOARD;
960                 /* Indirect data */
961                 *command_ptr++ = 0;
962                 /* the (X, Y) term of scoreboard */
963                 *command_ptr++ = ((y_inner << 16) | x_inner);
964                 *command_ptr++ = score_dep;
965                 /*inline data */
966                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
967                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
968                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
969                 *command_ptr++ = 0;
970
971                 x_inner -= 2;
972                 y_inner += 1;
973             }
974             x_outer += 1;
975         }
976
977         xtemp_outer = mb_width - 2;
978         if (xtemp_outer < 0)
979             xtemp_outer = 0;
980         x_outer = xtemp_outer;
981         y_outer = 0;
982         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
983             y_inner = y_outer;
984             x_inner = x_outer;
985             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
986                 mb_intra_ub = 0;
987                 score_dep = 0;
988                 if (x_inner != 0) {
989                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
990                     score_dep |= MB_SCOREBOARD_A;
991                 }
992                 if (y_inner != 0) {
993                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
994                     score_dep |= MB_SCOREBOARD_B;
995
996                     if (x_inner != 0)
997                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
998
999                     if (x_inner != (mb_width -1)) {
1000                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1001                         score_dep |= MB_SCOREBOARD_C;
1002                     }
1003                 }
1004
1005                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1006                 *command_ptr++ = kernel;
1007                 *command_ptr++ = MPEG2_SCOREBOARD;
1008                 /* Indirect data */
1009                 *command_ptr++ = 0;
1010                 /* the (X, Y) term of scoreboard */
1011                 *command_ptr++ = ((y_inner << 16) | x_inner);
1012                 *command_ptr++ = score_dep;
1013                 /*inline data */
1014                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1015                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1016
1017                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1018                 *command_ptr++ = 0;
1019                 x_inner -= 2;
1020                 y_inner += 1;
1021             }
1022             x_outer++;
1023             if (x_outer >= mb_width) {
1024                 y_outer += 1;
1025                 x_outer = xtemp_outer;
1026             }
1027         }
1028     }
1029
1030     *command_ptr++ = MI_BATCH_BUFFER_END;
1031     *command_ptr++ = 0;
1032
1033     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1034     return;
1035 }
1036
1037 static void
1038 gen9_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1039                                     struct encode_state *encode_state,
1040                                     int mb_width, int mb_height,
1041                                     int kernel,
1042                                     int transform_8x8_mode_flag,
1043                                     struct intel_encoder_context *encoder_context)
1044 {
1045     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1046     int mb_x = 0, mb_y = 0;
1047     int i, s, j;
1048     unsigned int *command_ptr;
1049
1050
1051     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1052     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1053
1054     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1055         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1056
1057         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1058             int slice_mb_begin = slice_param->macroblock_address;
1059             int slice_mb_number = slice_param->num_macroblocks;
1060             unsigned int mb_intra_ub;
1061
1062             for (i = 0; i < slice_mb_number;) {
1063                 int mb_count = i + slice_mb_begin;
1064
1065                 mb_x = mb_count % mb_width;
1066                 mb_y = mb_count / mb_width;
1067                 mb_intra_ub = 0;
1068
1069                 if (mb_x != 0) {
1070                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1071                 }
1072
1073                 if (mb_y != 0) {
1074                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1075
1076                     if (mb_x != 0)
1077                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1078
1079                     if (mb_x != (mb_width -1))
1080                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1081                 }
1082
1083                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1084                 *command_ptr++ = kernel;
1085                 *command_ptr++ = 0;
1086                 *command_ptr++ = 0;
1087                 *command_ptr++ = 0;
1088                 *command_ptr++ = 0;
1089
1090                 /*inline data */
1091                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1092                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1093
1094                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1095                 *command_ptr++ = 0;
1096                 i += 1;
1097             }
1098
1099             slice_param++;
1100         }
1101     }
1102
1103     *command_ptr++ = MI_BATCH_BUFFER_END;
1104     *command_ptr++ = 0;
1105
1106     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1107 }
1108
1109 static void
1110 gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1111                                    struct encode_state *encode_state,
1112                                    int is_intra,
1113                                    struct intel_encoder_context *encoder_context)
1114 {
1115     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1116     struct intel_batchbuffer *batch = encoder_context->base.batch;
1117     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1118     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1119     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1120     bool allow_hwscore = true;
1121     int s;
1122     int kernel_shader;
1123     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1124
1125     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1126         int j;
1127         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1128
1129         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1130             if (slice_param->macroblock_address % width_in_mbs) {
1131                 allow_hwscore = false;
1132                 break;
1133             }
1134         }
1135     }
1136
1137     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1138     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1139         allow_hwscore = false;
1140         kernel_shader = VME_INTRA_SHADER;
1141     } else {
1142         kernel_shader = VME_INTER_SHADER;
1143     }
1144
1145     if (allow_hwscore)
1146         gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1147                                                    encode_state,
1148                                                    width_in_mbs, height_in_mbs,
1149                                                    kernel_shader,
1150                                                    encoder_context);
1151     else
1152         gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
1153                                             encode_state,
1154                                             width_in_mbs, height_in_mbs,
1155                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1156                                             0,
1157                                             encoder_context);
1158
1159     intel_batchbuffer_start_atomic(batch, 0x1000);
1160     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1161     BEGIN_BATCH(batch, 4);
1162     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1163     OUT_RELOC(batch,
1164               vme_context->vme_batchbuffer.bo,
1165               I915_GEM_DOMAIN_COMMAND, 0,
1166               0);
1167     OUT_BATCH(batch, 0);
1168     OUT_BATCH(batch, 0);
1169     ADVANCE_BATCH(batch);
1170
1171     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1172
1173     intel_batchbuffer_end_atomic(batch);
1174 }
1175
1176 static VAStatus
1177 gen9_vme_mpeg2_prepare(VADriverContextP ctx,
1178                        struct encode_state *encode_state,
1179                        struct intel_encoder_context *encoder_context)
1180 {
1181     VAStatus vaStatus = VA_STATUS_SUCCESS;
1182     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1183     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1184     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1185
1186     if ((!vme_context->mpeg2_level) ||
1187         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1188             vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1189     }
1190
1191     /*Setup all the memory object*/
1192     gen9_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1193     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1194     //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1195     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1196     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1197
1198     /*Programing media pipeline*/
1199     gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1200
1201     return vaStatus;
1202 }
1203
1204 static VAStatus
1205 gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
1206                         VAProfile profile,
1207                         struct encode_state *encode_state,
1208                         struct intel_encoder_context *encoder_context)
1209 {
1210     gen9_vme_media_init(ctx, encoder_context);
1211     gen9_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1212     gen9_vme_run(ctx, encode_state, encoder_context);
1213     gen9_vme_stop(ctx, encode_state, encoder_context);
1214
1215     return VA_STATUS_SUCCESS;
1216 }
1217
1218 static void
1219 gen9_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1220                                    struct encode_state *encode_state,
1221                                    int index,
1222                                    int is_intra,
1223                                    struct intel_encoder_context *encoder_context)
1224 {
1225     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1226     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1227     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1228
1229     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1230 }
1231
1232 static void
1233 gen9_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1234                                             struct encode_state *encode_state,
1235                                             int index,
1236                                             struct intel_encoder_context *encoder_context)
1237 {
1238     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1239     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1240     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1241
1242     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1243 }
1244
1245 static VAStatus
1246 gen9_vme_vp8_surface_setup(VADriverContextP ctx,
1247                              struct encode_state *encode_state,
1248                              int is_intra,
1249                              struct intel_encoder_context *encoder_context)
1250 {
1251     struct object_surface *obj_surface;
1252
1253     /*Setup surfaces state*/
1254     /* current picture for encoding */
1255     obj_surface = encode_state->input_yuv_object;
1256     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1257     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1258     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1259
1260     if (!is_intra) {
1261         /* reference 0 */
1262         obj_surface = encode_state->reference_objects[0];
1263
1264         if (obj_surface->bo != NULL)
1265             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1266
1267         /* reference 1 */
1268         obj_surface = encode_state->reference_objects[1];
1269
1270         if (obj_surface && obj_surface->bo != NULL)
1271             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1272     }
1273
1274     /* VME output */
1275     gen9_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1276     gen9_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1277
1278     return VA_STATUS_SUCCESS;
1279 }
1280
1281 static void
1282 gen9_vme_vp8_pipeline_programing(VADriverContextP ctx,
1283                                    struct encode_state *encode_state,
1284                                    int is_intra,
1285                                    struct intel_encoder_context *encoder_context)
1286 {
1287     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1288     struct intel_batchbuffer *batch = encoder_context->base.batch;
1289     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1290     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1291     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1292     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1293
1294     gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1295                                                  encode_state,
1296                                                  width_in_mbs, height_in_mbs,
1297                                                  kernel_shader,
1298                                                  encoder_context);
1299
1300     intel_batchbuffer_start_atomic(batch, 0x1000);
1301     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1302     BEGIN_BATCH(batch, 4);
1303     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1304     OUT_RELOC(batch,
1305               vme_context->vme_batchbuffer.bo,
1306               I915_GEM_DOMAIN_COMMAND, 0,
1307               0);
1308     OUT_BATCH(batch, 0);
1309     OUT_BATCH(batch, 0);
1310     ADVANCE_BATCH(batch);
1311
1312     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1313
1314     intel_batchbuffer_end_atomic(batch);
1315 }
1316
1317 static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
1318                                  struct encode_state *encode_state,
1319                                  struct intel_encoder_context *encoder_context)
1320 {
1321     VAStatus vaStatus = VA_STATUS_SUCCESS;
1322     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1323     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1324
1325     /* update vp8 mbmv cost */
1326     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1327
1328     /*Setup all the memory object*/
1329     gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1330     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1331     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1332
1333     /*Programing media pipeline*/
1334     gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1335
1336     return vaStatus;
1337 }
1338
1339 static VAStatus
1340 gen9_vme_vp8_pipeline(VADriverContextP ctx,
1341                         VAProfile profile,
1342                         struct encode_state *encode_state,
1343                         struct intel_encoder_context *encoder_context)
1344 {
1345     gen9_vme_media_init(ctx, encoder_context);
1346     gen9_vme_vp8_prepare(ctx, encode_state, encoder_context);
1347     gen9_vme_run(ctx, encode_state, encoder_context);
1348     gen9_vme_stop(ctx, encode_state, encoder_context);
1349
1350     return VA_STATUS_SUCCESS;
1351 }
1352
1353 /* HEVC */
1354
1355 static void
1356 gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
1357                              struct encode_state *encode_state,
1358                              int index,
1359                              struct intel_encoder_context *encoder_context)
1360
1361 {
1362     struct i965_driver_data *i965 = i965_driver_data(ctx);
1363     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1364     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1365     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1366     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1367     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1368     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1369
1370
1371     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
1372     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1373
1374     if (is_intra)
1375         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
1376     else
1377         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
1378     /*
1379      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
1380      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
1381      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
1382      */
1383
1384     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1385                                               "VME output buffer",
1386                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
1387                                               0x1000);
1388     assert(vme_context->vme_output.bo);
1389     vme_context->vme_buffer_suface_setup(ctx,
1390                                          &vme_context->gpe_context,
1391                                          &vme_context->vme_output,
1392                                          BINDING_TABLE_OFFSET(index),
1393                                          SURFACE_STATE_OFFSET(index));
1394 }
1395
1396 static void
1397 gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
1398                                       struct encode_state *encode_state,
1399                                       int index,
1400                                       struct intel_encoder_context *encoder_context)
1401
1402 {
1403     struct i965_driver_data *i965 = i965_driver_data(ctx);
1404     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1405     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1406     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1407     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1408
1409     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
1410     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
1411     vme_context->vme_batchbuffer.pitch = 16;
1412     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
1413                                                    "VME batchbuffer",
1414                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
1415                                                    0x1000);
1416 }
1417 static VAStatus
1418 gen9_vme_hevc_surface_setup(VADriverContextP ctx,
1419                        struct encode_state *encode_state,
1420                        int is_intra,
1421                        struct intel_encoder_context *encoder_context)
1422 {
1423     struct object_surface *obj_surface;
1424     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1425     GenHevcSurface *hevc_encoder_surface = NULL;
1426
1427     /*Setup surfaces state*/
1428     /* current picture for encoding */
1429     obj_surface = encode_state->input_yuv_object;
1430
1431     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
1432         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) {
1433         hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1434         assert(hevc_encoder_surface);
1435         obj_surface = hevc_encoder_surface->nv12_surface_obj;
1436     }
1437     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1438     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1439     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1440
1441     if (!is_intra) {
1442         VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1443         int slice_type;
1444
1445         slice_type = slice_param->slice_type;
1446         assert(slice_type != HEVC_SLICE_I);
1447
1448         /* to do HEVC */
1449         intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
1450
1451         if (slice_type == HEVC_SLICE_B)
1452             intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
1453     }
1454
1455     /* VME output */
1456     gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
1457     gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1458
1459     return VA_STATUS_SUCCESS;
1460 }
1461 static void
1462 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1463                                      struct encode_state *encode_state,
1464                                      int mb_width, int mb_height,
1465                                      int kernel,
1466                                      int transform_8x8_mode_flag,
1467                                      struct intel_encoder_context *encoder_context)
1468 {
1469     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1470     int mb_row;
1471     int s;
1472     unsigned int *command_ptr;
1473     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1474     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1475     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1476     int ctb_size = 1 << log2_ctb_size;
1477     int num_mb_in_ctb = (ctb_size + 15)/16;
1478     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1479
1480 #define         USE_SCOREBOARD          (1 << 21)
1481
1482     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1483     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1484
1485     /*slice_segment_address  must picture_width_in_ctb alainment */
1486     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1487         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1488         int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1489         int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1490         unsigned int mb_intra_ub, score_dep;
1491         int x_outer, y_outer, x_inner, y_inner;
1492         int xtemp_outer = 0;
1493
1494         x_outer = first_mb % mb_width;
1495         y_outer = first_mb / mb_width;
1496         mb_row = y_outer;
1497
1498         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1499             x_inner = x_outer;
1500             y_inner = y_outer;
1501             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1502                 mb_intra_ub = 0;
1503                 score_dep = 0;
1504                 if (x_inner != 0) {
1505                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1506                     score_dep |= MB_SCOREBOARD_A;
1507                 }
1508                 if (y_inner != mb_row) {
1509                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1510                     score_dep |= MB_SCOREBOARD_B;
1511                     if (x_inner != 0)
1512                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1513                     if (x_inner != (mb_width -1)) {
1514                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1515                         score_dep |= MB_SCOREBOARD_C;
1516                     }
1517                 }
1518
1519                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1520                 *command_ptr++ = kernel;
1521                 *command_ptr++ = USE_SCOREBOARD;
1522                 /* Indirect data */
1523                 *command_ptr++ = 0;
1524                 /* the (X, Y) term of scoreboard */
1525                 *command_ptr++ = ((y_inner << 16) | x_inner);
1526                 *command_ptr++ = score_dep;
1527                 /*inline data */
1528                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1529                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1530                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1531                 *command_ptr++ = 0;
1532
1533                 x_inner -= 2;
1534                 y_inner += 1;
1535             }
1536             x_outer += 1;
1537         }
1538
1539         xtemp_outer = mb_width - 2;
1540         if (xtemp_outer < 0)
1541             xtemp_outer = 0;
1542         x_outer = xtemp_outer;
1543         y_outer = first_mb / mb_width;
1544         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1545             y_inner = y_outer;
1546             x_inner = x_outer;
1547             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1548                 mb_intra_ub = 0;
1549                 score_dep = 0;
1550                 if (x_inner != 0) {
1551                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1552                     score_dep |= MB_SCOREBOARD_A;
1553                 }
1554                 if (y_inner != mb_row) {
1555                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1556                     score_dep |= MB_SCOREBOARD_B;
1557                     if (x_inner != 0)
1558                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1559
1560                     if (x_inner != (mb_width -1)) {
1561                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1562                         score_dep |= MB_SCOREBOARD_C;
1563                     }
1564                 }
1565
1566                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1567                 *command_ptr++ = kernel;
1568                 *command_ptr++ = USE_SCOREBOARD;
1569                 /* Indirect data */
1570                 *command_ptr++ = 0;
1571                 /* the (X, Y) term of scoreboard */
1572                 *command_ptr++ = ((y_inner << 16) | x_inner);
1573                 *command_ptr++ = score_dep;
1574                 /*inline data */
1575                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1576                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1577
1578                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1579                 *command_ptr++ = 0;
1580                 x_inner -= 2;
1581                 y_inner += 1;
1582             }
1583             x_outer++;
1584             if (x_outer >= mb_width) {
1585                 y_outer += 1;
1586                 x_outer = xtemp_outer;
1587             }
1588         }
1589     }
1590
1591     *command_ptr++ = MI_BATCH_BUFFER_END;
1592     *command_ptr++ = 0;
1593
1594     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1595 }
1596
1597 static void
1598 gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
1599                               struct encode_state *encode_state,
1600                               int mb_width, int mb_height,
1601                               int kernel,
1602                               int transform_8x8_mode_flag,
1603                               struct intel_encoder_context *encoder_context)
1604 {
1605     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1606     int mb_x = 0, mb_y = 0;
1607     int i, s;
1608     unsigned int *command_ptr;
1609     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1610     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1611     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1612
1613     int ctb_size = 1 << log2_ctb_size;
1614     int num_mb_in_ctb = (ctb_size + 15)/16;
1615     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1616
1617     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1618     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1619
1620     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1621         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1622         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1623         int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1624
1625         unsigned int mb_intra_ub;
1626         int slice_mb_x = slice_mb_begin % mb_width;
1627         for (i = 0; i < slice_mb_number;  ) {
1628             int mb_count = i + slice_mb_begin;
1629             mb_x = mb_count % mb_width;
1630             mb_y = mb_count / mb_width;
1631             mb_intra_ub = 0;
1632
1633             if (mb_x != 0) {
1634                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1635             }
1636             if (mb_y != 0) {
1637                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1638                 if (mb_x != 0)
1639                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1640                 if (mb_x != (mb_width -1))
1641                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1642             }
1643             if (i < mb_width) {
1644                 if (i == 0)
1645                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
1646                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
1647                 if ((i == (mb_width - 1)) && slice_mb_x) {
1648                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1649                 }
1650             }
1651
1652             if ((i == mb_width) && slice_mb_x) {
1653                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
1654             }
1655
1656             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1657             *command_ptr++ = kernel;
1658             *command_ptr++ = 0;
1659             *command_ptr++ = 0;
1660             *command_ptr++ = 0;
1661             *command_ptr++ = 0;
1662
1663             /*inline data */
1664             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1665             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1666
1667             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1668             *command_ptr++ = 0;
1669             i += 1;
1670         }
1671     }
1672
1673     *command_ptr++ = MI_BATCH_BUFFER_END;
1674     *command_ptr++ = 0;
1675
1676     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1677 }
1678
1679 static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
1680                                          struct encode_state *encode_state,
1681                                          struct intel_encoder_context *encoder_context)
1682 {
1683     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1684     struct intel_batchbuffer *batch = encoder_context->base.batch;
1685     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1686     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1687     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1688     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1689     int kernel_shader;
1690     bool allow_hwscore = true;
1691     int s;
1692
1693     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1694     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1695
1696     int ctb_size = 1 << log2_ctb_size;
1697     int num_mb_in_ctb = (ctb_size + 15)/16;
1698     int transform_8x8_mode_flag = 1;
1699     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1700
1701     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1702         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1703         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1704         if ((slice_mb_begin % width_in_mbs)) {
1705             allow_hwscore = false;
1706             break;
1707         }
1708     }
1709
1710     if (pSliceParameter->slice_type == HEVC_SLICE_I) {
1711         kernel_shader = VME_INTRA_SHADER;
1712     } else if (pSliceParameter->slice_type == HEVC_SLICE_P) {
1713         kernel_shader = VME_INTER_SHADER;
1714     } else {
1715         kernel_shader = VME_BINTER_SHADER;
1716         if (!allow_hwscore)
1717             kernel_shader = VME_INTER_SHADER;
1718     }
1719     if (allow_hwscore)
1720         gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
1721                                                encode_state,
1722                                                width_in_mbs, height_in_mbs,
1723                                                kernel_shader,
1724                                                transform_8x8_mode_flag,
1725                                                encoder_context);
1726     else
1727         gen9_vme_hevc_fill_vme_batchbuffer(ctx,
1728                                       encode_state,
1729                                       width_in_mbs, height_in_mbs,
1730                                       kernel_shader,
1731                                       transform_8x8_mode_flag,
1732                                       encoder_context);
1733
1734     intel_batchbuffer_start_atomic(batch, 0x1000);
1735     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1736     BEGIN_BATCH(batch, 3);
1737     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1738     OUT_RELOC(batch,
1739               vme_context->vme_batchbuffer.bo,
1740               I915_GEM_DOMAIN_COMMAND, 0,
1741               0);
1742     OUT_BATCH(batch, 0);
1743     ADVANCE_BATCH(batch);
1744
1745     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1746
1747     intel_batchbuffer_end_atomic(batch);
1748 }
1749
1750 static VAStatus gen9_intel_init_hevc_surface(VADriverContextP ctx,
1751                             struct intel_encoder_context *encoder_context,
1752                             struct encode_state *encode_state,
1753                             struct object_surface *input_obj_surface)
1754 {
1755     struct i965_driver_data *i965 = i965_driver_data(ctx);
1756     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1757     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1758     GenHevcSurface *hevc_encoder_surface;
1759     struct i965_surface src_surface, dst_surface;
1760     struct object_surface *obj_surface;
1761     VARectangle rect;
1762     VAStatus status;
1763
1764     uint32_t size;
1765
1766     obj_surface = input_obj_surface;
1767     assert(obj_surface && obj_surface->bo);
1768
1769     if (obj_surface->private_data == NULL) {
1770
1771         if (mfc_context->pic_size.ctb_size == 16)
1772             size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
1773             ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
1774         else
1775             size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
1776             ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
1777         size <<= 6; /* in unit of 64bytes */
1778
1779         hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
1780
1781         assert(hevc_encoder_surface);
1782         hevc_encoder_surface->motion_vector_temporal_bo =
1783             dri_bo_alloc(i965->intel.bufmgr,
1784             "motion vector temporal buffer",
1785             size,
1786             0x1000);
1787         assert(hevc_encoder_surface->motion_vector_temporal_bo);
1788
1789         hevc_encoder_surface->ctx = ctx;
1790         hevc_encoder_surface->nv12_surface_obj = NULL;
1791         hevc_encoder_surface->nv12_surface_id = VA_INVALID_SURFACE;
1792         hevc_encoder_surface->has_p010_to_nv12_done = 0;
1793
1794         obj_surface->private_data = (void *)hevc_encoder_surface;
1795         obj_surface->free_private_data = (void *)gen_free_hevc_surface;
1796     }
1797
1798     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1799
1800     if(!hevc_encoder_surface->has_p010_to_nv12_done && obj_surface->fourcc == VA_FOURCC_P010)
1801     {
1802         // convert input
1803         rect.x = 0;
1804         rect.y = 0;
1805         rect.width = obj_surface->orig_width;
1806         rect.height = obj_surface->orig_height;
1807
1808         src_surface.base = (struct object_base *)obj_surface;
1809         src_surface.type = I965_SURFACE_TYPE_SURFACE;
1810         src_surface.flags = I965_SURFACE_FLAG_FRAME;
1811
1812         if(SURFACE(hevc_encoder_surface->nv12_surface_id) == NULL)
1813         {
1814             status = i965_CreateSurfaces(ctx,
1815                 obj_surface->orig_width,
1816                 obj_surface->orig_height,
1817                 VA_RT_FORMAT_YUV420,
1818                 1,
1819                 &hevc_encoder_surface->nv12_surface_id);
1820             assert(status == VA_STATUS_SUCCESS);
1821
1822             if (status != VA_STATUS_SUCCESS)
1823                 return status;
1824         }
1825
1826         obj_surface = SURFACE(hevc_encoder_surface->nv12_surface_id);
1827         hevc_encoder_surface->nv12_surface_obj = obj_surface;
1828         assert(obj_surface);
1829         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1830
1831         dst_surface.base = (struct object_base *)obj_surface;
1832         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
1833         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
1834
1835         status = i965_image_processing(ctx,
1836             &src_surface,
1837             &rect,
1838             &dst_surface,
1839             &rect);
1840         assert(status == VA_STATUS_SUCCESS);
1841         hevc_encoder_surface->has_p010_to_nv12_done = 1;
1842         i965_SyncSurface(ctx,hevc_encoder_surface->nv12_surface_id);
1843     }
1844     return VA_STATUS_SUCCESS;
1845 }
1846
1847 static VAStatus gen9_intel_hevc_input_check(VADriverContextP ctx,
1848                             struct encode_state *encode_state,
1849                             struct intel_encoder_context *encoder_context)
1850 {
1851     struct i965_driver_data *i965 = i965_driver_data(ctx);
1852     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1853     struct object_surface *obj_surface;
1854     GenHevcSurface *hevc_encoder_surface = NULL;
1855     int i;
1856
1857     obj_surface = SURFACE(encode_state->current_render_target);
1858     assert(obj_surface && obj_surface->bo);
1859     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1860     if(hevc_encoder_surface)
1861         hevc_encoder_surface->has_p010_to_nv12_done = 0;
1862     gen9_intel_init_hevc_surface(ctx,encoder_context,encode_state,obj_surface);
1863
1864     /* Setup current frame and current direct mv buffer*/
1865     obj_surface = encode_state->reconstructed_object;
1866     hevc_encoder_surface = NULL;
1867     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1868     if(hevc_encoder_surface)
1869         hevc_encoder_surface->has_p010_to_nv12_done = 1;
1870     gen9_intel_init_hevc_surface(ctx,encoder_context,encode_state,obj_surface);
1871
1872     /* Setup reference frames and direct mv buffers*/
1873     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
1874         obj_surface = encode_state->reference_objects[i];
1875
1876         if (obj_surface && obj_surface->bo) {
1877             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1878             dri_bo_reference(obj_surface->bo);
1879
1880             gen9_intel_init_hevc_surface(ctx,encoder_context,encode_state,obj_surface);
1881         } else {
1882             break;
1883         }
1884     }
1885
1886     return VA_STATUS_SUCCESS;
1887 }
1888
1889 static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
1890                                  struct encode_state *encode_state,
1891                                  struct intel_encoder_context *encoder_context)
1892 {
1893     VAStatus vaStatus = VA_STATUS_SUCCESS;
1894     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1895     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1896     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1897     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1898
1899     /* here use the avc level for hevc vme */
1900     if (!vme_context->hevc_level ||
1901         (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
1902         vme_context->hevc_level = pSequenceParameter->general_level_idc;
1903     }
1904
1905     //internal input check for main10
1906     gen9_intel_hevc_input_check(ctx,encode_state,encoder_context);
1907
1908     intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
1909
1910     /*Setup all the memory object*/
1911     gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
1912     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1913     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
1914     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1915
1916     /*Programing media pipeline*/
1917     gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
1918
1919     return vaStatus;
1920 }
1921
1922
1923 static VAStatus
1924 gen9_vme_hevc_pipeline(VADriverContextP ctx,
1925                   VAProfile profile,
1926                   struct encode_state *encode_state,
1927                   struct intel_encoder_context *encoder_context)
1928 {
1929     gen9_vme_media_init(ctx, encoder_context);
1930     gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
1931     gen9_vme_run(ctx, encode_state, encoder_context);
1932     gen9_vme_stop(ctx, encode_state, encoder_context);
1933
1934     return VA_STATUS_SUCCESS;
1935 }
1936
1937
1938 static void
1939 gen9_vme_context_destroy(void *context)
1940 {
1941     struct gen6_vme_context *vme_context = context;
1942
1943     gen8_gpe_context_destroy(&vme_context->gpe_context);
1944
1945     dri_bo_unreference(vme_context->vme_output.bo);
1946     vme_context->vme_output.bo = NULL;
1947
1948     dri_bo_unreference(vme_context->vme_state.bo);
1949     vme_context->vme_state.bo = NULL;
1950
1951     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1952     vme_context->vme_batchbuffer.bo = NULL;
1953
1954     free(vme_context->vme_state_message);
1955     vme_context->vme_state_message = NULL;
1956
1957     dri_bo_unreference(vme_context->i_qp_cost_table);
1958     vme_context->i_qp_cost_table = NULL;
1959
1960     dri_bo_unreference(vme_context->p_qp_cost_table);
1961     vme_context->p_qp_cost_table = NULL;
1962
1963     dri_bo_unreference(vme_context->b_qp_cost_table);
1964     vme_context->b_qp_cost_table = NULL;
1965
1966     free(vme_context->qp_per_mb);
1967     vme_context->qp_per_mb = NULL;
1968
1969     free(vme_context);
1970 }
1971
1972 Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1973 {
1974     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1975     struct i965_kernel *vme_kernel_list = NULL;
1976     int i965_kernel_num;
1977
1978     switch (encoder_context->codec) {
1979     case CODEC_H264:
1980     case CODEC_H264_MVC:
1981         vme_kernel_list = gen9_vme_kernels;
1982         encoder_context->vme_pipeline = gen9_vme_pipeline;
1983         i965_kernel_num = sizeof(gen9_vme_kernels) / sizeof(struct i965_kernel);
1984         break;
1985
1986     case CODEC_MPEG2:
1987         vme_kernel_list = gen9_vme_mpeg2_kernels;
1988         encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
1989         i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
1990         break;
1991
1992     case CODEC_VP8:
1993         vme_kernel_list = gen9_vme_vp8_kernels;
1994         encoder_context->vme_pipeline = gen9_vme_vp8_pipeline;
1995         i965_kernel_num = sizeof(gen9_vme_vp8_kernels) / sizeof(struct i965_kernel);
1996         break;
1997
1998     case CODEC_HEVC:
1999         vme_kernel_list = gen9_vme_hevc_kernels;
2000         encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
2001         i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
2002         break;
2003
2004     default:
2005         /* never get here */
2006         assert(0);
2007
2008         break;
2009     }
2010
2011     assert(vme_context);
2012     vme_context->vme_kernel_sum = i965_kernel_num;
2013     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2014
2015     vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
2016     vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
2017     vme_context->gpe_context.sampler_size = 0;
2018
2019
2020     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2021     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
2022     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
2023     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
2024     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
2025
2026     gen7_vme_scoreboard_init(ctx, vme_context);
2027
2028     gen8_gpe_load_kernels(ctx,
2029                           &vme_context->gpe_context,
2030                           vme_kernel_list,
2031                           i965_kernel_num);
2032     vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
2033     vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
2034     vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2035     vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
2036
2037     encoder_context->vme_context = vme_context;
2038     encoder_context->vme_context_destroy = gen9_vme_context_destroy;
2039
2040     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
2041
2042     return True;
2043 }