OSDN Git Service

ROI:enable on gen8 and gen9
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vme.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43
44 #ifdef SURFACE_STATE_PADDED_SIZE
45 #undef SURFACE_STATE_PADDED_SIZE
46 #endif
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define VME_INTRA_SHADER        0
53 #define VME_INTER_SHADER        1
54 #define VME_BINTER_SHADER       2
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 #define VME_MSG_LENGTH          32
61
62 static const uint32_t gen9_vme_intra_frame[][4] = {
63 #include "shaders/vme/intra_frame_gen9.g9b"
64 };
65
66 static const uint32_t gen9_vme_inter_frame[][4] = {
67 #include "shaders/vme/inter_frame_gen9.g9b"
68 };
69
70 static const uint32_t gen9_vme_inter_bframe[][4] = {
71 #include "shaders/vme/inter_bframe_gen9.g9b"
72 };
73
74 static struct i965_kernel gen9_vme_kernels[] = {
75     {
76         "VME Intra Frame",
77         VME_INTRA_SHADER, /*index*/
78         gen9_vme_intra_frame,
79         sizeof(gen9_vme_intra_frame),
80         NULL
81     },
82     {
83         "VME inter Frame",
84         VME_INTER_SHADER,
85         gen9_vme_inter_frame,
86         sizeof(gen9_vme_inter_frame),
87         NULL
88     },
89     {
90         "VME inter BFrame",
91         VME_BINTER_SHADER,
92         gen9_vme_inter_bframe,
93         sizeof(gen9_vme_inter_bframe),
94         NULL
95     }
96 };
97
98 static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = {
99 #include "shaders/vme/intra_frame_gen9.g9b"
100 };
101
102 static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = {
103 #include "shaders/vme/mpeg2_inter_gen9.g9b"
104 };
105
106 static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
107     {
108         "VME Intra Frame",
109         VME_INTRA_SHADER, /*index*/
110         gen9_vme_mpeg2_intra_frame,
111         sizeof(gen9_vme_mpeg2_intra_frame),
112         NULL
113     },
114     {
115         "VME inter Frame",
116         VME_INTER_SHADER,
117         gen9_vme_mpeg2_inter_frame,
118         sizeof(gen9_vme_mpeg2_inter_frame),
119         NULL
120     },
121 };
122
123 static const uint32_t gen9_vme_vp8_intra_frame[][4] = {
124 #include "shaders/vme/vp8_intra_frame_gen9.g9b"
125 };
126
127 static const uint32_t gen9_vme_vp8_inter_frame[][4] = {
128 #include "shaders/vme/vp8_inter_frame_gen9.g9b"
129 };
130
131 static struct i965_kernel gen9_vme_vp8_kernels[] = {
132     {
133         "VME Intra Frame",
134         VME_INTRA_SHADER, /*index*/
135         gen9_vme_vp8_intra_frame,
136         sizeof(gen9_vme_vp8_intra_frame),
137         NULL
138     },
139     {
140         "VME inter Frame",
141         VME_INTER_SHADER,
142         gen9_vme_vp8_inter_frame,
143         sizeof(gen9_vme_vp8_inter_frame),
144         NULL
145     },
146 };
147
148 /* HEVC */
149
150 static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
151 #include "shaders/vme/intra_frame_gen9.g9b"
152 };
153
154 static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
155 #include "shaders/vme/inter_frame_gen9.g9b"
156 };
157
158 static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
159 #include "shaders/vme/inter_bframe_gen9.g9b"
160 };
161
162 static struct i965_kernel gen9_vme_hevc_kernels[] = {
163     {
164         "VME Intra Frame",
165         VME_INTRA_SHADER, /*index*/
166         gen9_vme_hevc_intra_frame,
167         sizeof(gen9_vme_hevc_intra_frame),
168         NULL
169     },
170     {
171         "VME inter Frame",
172         VME_INTER_SHADER,
173         gen9_vme_hevc_inter_frame,
174         sizeof(gen9_vme_hevc_inter_frame),
175         NULL
176     },
177     {
178         "VME inter BFrame",
179         VME_BINTER_SHADER,
180         gen9_vme_hevc_inter_bframe,
181         sizeof(gen9_vme_hevc_inter_bframe),
182         NULL
183     }
184 };
185 /* only used for VME source surface state */
186 static void
187 gen9_vme_source_surface_state(VADriverContextP ctx,
188                               int index,
189                               struct object_surface *obj_surface,
190                               struct intel_encoder_context *encoder_context)
191 {
192     struct gen6_vme_context *vme_context = encoder_context->vme_context;
193
194     vme_context->vme_surface2_setup(ctx,
195                                     &vme_context->gpe_context,
196                                     obj_surface,
197                                     BINDING_TABLE_OFFSET(index),
198                                     SURFACE_STATE_OFFSET(index));
199 }
200
201 static void
202 gen9_vme_media_source_surface_state(VADriverContextP ctx,
203                                     int index,
204                                     struct object_surface *obj_surface,
205                                     struct intel_encoder_context *encoder_context)
206 {
207     struct gen6_vme_context *vme_context = encoder_context->vme_context;
208
209     vme_context->vme_media_rw_surface_setup(ctx,
210                                             &vme_context->gpe_context,
211                                             obj_surface,
212                                             BINDING_TABLE_OFFSET(index),
213                                             SURFACE_STATE_OFFSET(index));
214 }
215
216 static void
217 gen9_vme_media_chroma_source_surface_state(VADriverContextP ctx,
218                                            int index,
219                                            struct object_surface *obj_surface,
220                                            struct intel_encoder_context *encoder_context)
221 {
222     struct gen6_vme_context *vme_context = encoder_context->vme_context;
223
224     vme_context->vme_media_chroma_surface_setup(ctx,
225                                                 &vme_context->gpe_context,
226                                                 obj_surface,
227                                                 BINDING_TABLE_OFFSET(index),
228                                                 SURFACE_STATE_OFFSET(index));
229 }
230
231 static void
232 gen9_vme_output_buffer_setup(VADriverContextP ctx,
233                              struct encode_state *encode_state,
234                              int index,
235                              struct intel_encoder_context *encoder_context,
236                              int is_intra,
237                              int width_in_mbs,
238                              int height_in_mbs)
239
240 {
241     struct i965_driver_data *i965 = i965_driver_data(ctx);
242     struct gen6_vme_context *vme_context = encoder_context->vme_context;
243
244     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
245     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
246
247     if (is_intra)
248         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
249     else
250         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
251     /*
252      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
253      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
254      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
255      */
256
257     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
258                                               "VME output buffer",
259                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
260                                               0x1000);
261     assert(vme_context->vme_output.bo);
262     vme_context->vme_buffer_suface_setup(ctx,
263                                          &vme_context->gpe_context,
264                                          &vme_context->vme_output,
265                                          BINDING_TABLE_OFFSET(index),
266                                          SURFACE_STATE_OFFSET(index));
267 }
268
269 static void
270 gen9_vme_avc_output_buffer_setup(VADriverContextP ctx,
271                              struct encode_state *encode_state,
272                              int index,
273                              struct intel_encoder_context *encoder_context)
274 {
275     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
276     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
277     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
278     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
279     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
280
281     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
282
283 }
284
285 static void
286 gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
287                                       struct encode_state *encode_state,
288                                       int index,
289                                       struct intel_encoder_context *encoder_context,
290                                       int width_in_mbs,
291                                       int height_in_mbs)
292 {
293     struct i965_driver_data *i965 = i965_driver_data(ctx);
294     struct gen6_vme_context *vme_context = encoder_context->vme_context;
295
296     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
297     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
298     vme_context->vme_batchbuffer.pitch = 16;
299     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
300                                                    "VME batchbuffer",
301                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
302                                                    0x1000);
303     vme_context->vme_buffer_suface_setup(ctx,
304                                          &vme_context->gpe_context,
305                                          &vme_context->vme_batchbuffer,
306                                          BINDING_TABLE_OFFSET(index),
307                                          SURFACE_STATE_OFFSET(index));
308 }
309
310 static void
311 gen9_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
312                                       struct encode_state *encode_state,
313                                       int index,
314                                       struct intel_encoder_context *encoder_context)
315 {
316     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
317     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
318     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
319
320     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
321 }
322
323                                       
324 static VAStatus
325 gen9_vme_surface_setup(VADriverContextP ctx,
326                        struct encode_state *encode_state,
327                        int is_intra,
328                        struct intel_encoder_context *encoder_context)
329 {
330     struct object_surface *obj_surface;
331
332     /*Setup surfaces state*/
333     /* current picture for encoding */
334     obj_surface = encode_state->input_yuv_object;
335     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
336     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
337     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
338
339     if (!is_intra) {
340         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
341         int slice_type;
342
343         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
344         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
345
346         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
347
348         if (slice_type == SLICE_TYPE_B)
349             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
350     }
351
352     /* VME output */
353     gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
354     gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
355     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
356                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
357                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
358
359     return VA_STATUS_SUCCESS;
360 }
361
362 static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
363                                          struct encode_state *encode_state,
364                                          struct intel_encoder_context *encoder_context)
365 {
366     struct gen6_vme_context *vme_context = encoder_context->vme_context;
367     struct gen8_interface_descriptor_data *desc;
368     int i;
369     dri_bo *bo;
370     unsigned char *desc_ptr;
371
372     bo = vme_context->gpe_context.dynamic_state.bo;
373     dri_bo_map(bo, 1);
374     assert(bo->virtual);
375     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
376
377     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
378
379     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
380         struct i965_kernel *kernel;
381         kernel = &vme_context->gpe_context.kernels[i];
382         assert(sizeof(*desc) == 32);
383         /*Setup the descritor table*/
384         memset(desc, 0, sizeof(*desc));
385         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
386         desc->desc3.sampler_count = 0; /* FIXME: */
387         desc->desc3.sampler_state_pointer = 0;
388         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
389         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
390         desc->desc5.constant_urb_entry_read_offset = 0;
391         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
392
393         desc++;
394     }
395
396     dri_bo_unmap(bo);
397
398     return VA_STATUS_SUCCESS;
399 }
400
401 static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
402                                         struct encode_state *encode_state,
403                                         struct intel_encoder_context *encoder_context)
404 {
405     struct gen6_vme_context *vme_context = encoder_context->vme_context;
406     unsigned char *constant_buffer;
407     unsigned int *vme_state_message;
408     int mv_num = 32;
409
410     vme_state_message = (unsigned int *)vme_context->vme_state_message;
411
412     if (encoder_context->codec == CODEC_H264 ||
413         encoder_context->codec == CODEC_H264_MVC) {
414         if (vme_context->h264_level >= 30) {
415             mv_num = 16;
416
417             if (vme_context->h264_level >= 31)
418                 mv_num = 8;
419         }
420     } else if (encoder_context->codec == CODEC_MPEG2) {
421         mv_num = 2;
422     }else if (encoder_context->codec == CODEC_HEVC) {
423         if (vme_context->hevc_level >= 30*3) {
424             mv_num = 16;
425
426             if (vme_context->hevc_level >= 31*3)
427                 mv_num = 8;
428         }/* use the avc level setting */
429     }
430
431     vme_state_message[31] = mv_num;
432
433     dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
434     assert(vme_context->gpe_context.dynamic_state.bo->virtual);
435     constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
436                                          vme_context->gpe_context.curbe_offset;
437
438     /* VME MV/Mb cost table is passed by using const buffer */
439     /* Now it uses the fixed search path. So it is constructed directly
440      * in the GPU shader.
441      */
442     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
443
444     dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
445
446     return VA_STATUS_SUCCESS;
447 }
448
449 #define         MB_SCOREBOARD_A         (1 << 0)
450 #define         MB_SCOREBOARD_B         (1 << 1)
451 #define         MB_SCOREBOARD_C         (1 << 2)
452
453 /* check whether the mb of (x_index, y_index) is out of bound */
454 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
455 {
456     int mb_index;
457     if (x_index < 0 || x_index >= mb_width)
458         return -1;
459     if (y_index < 0 || y_index >= mb_height)
460         return -1;
461
462     mb_index = y_index * mb_width + x_index;
463     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
464         return -1;
465     return 0;
466 }
467
468 static void
469 gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
470                                      struct encode_state *encode_state,
471                                      int mb_width, int mb_height,
472                                      int kernel,
473                                      int transform_8x8_mode_flag,
474                                      struct intel_encoder_context *encoder_context)
475 {
476     struct gen6_vme_context *vme_context = encoder_context->vme_context;
477     int mb_row;
478     int s;
479     unsigned int *command_ptr;
480
481 #define         USE_SCOREBOARD          (1 << 21)
482
483     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
484     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
485
486     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
487         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
488         int first_mb = pSliceParameter->macroblock_address;
489         int num_mb = pSliceParameter->num_macroblocks;
490         unsigned int mb_intra_ub, score_dep;
491         int x_outer, y_outer, x_inner, y_inner;
492         int xtemp_outer = 0;
493
494         x_outer = first_mb % mb_width;
495         y_outer = first_mb / mb_width;
496         mb_row = y_outer;
497
498         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
499             x_inner = x_outer;
500             y_inner = y_outer;
501             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
502                 mb_intra_ub = 0;
503                 score_dep = 0;
504                 if (x_inner != 0) {
505                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
506                     score_dep |= MB_SCOREBOARD_A;
507                 }
508                 if (y_inner != mb_row) {
509                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
510                     score_dep |= MB_SCOREBOARD_B;
511                     if (x_inner != 0)
512                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
513                     if (x_inner != (mb_width -1)) {
514                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
515                         score_dep |= MB_SCOREBOARD_C;
516                     }
517                 }
518
519                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
520                 *command_ptr++ = kernel;
521                 *command_ptr++ = USE_SCOREBOARD;
522                 /* Indirect data */
523                 *command_ptr++ = 0;
524                 /* the (X, Y) term of scoreboard */
525                 *command_ptr++ = ((y_inner << 16) | x_inner);
526                 *command_ptr++ = score_dep;
527                 /*inline data */
528                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
529                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
530                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
531                 *command_ptr++ = 0;
532
533                 x_inner -= 2;
534                 y_inner += 1;
535             }
536             x_outer += 1;
537         }
538
539         xtemp_outer = mb_width - 2;
540         if (xtemp_outer < 0)
541             xtemp_outer = 0;
542         x_outer = xtemp_outer;
543         y_outer = first_mb / mb_width;
544         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
545             y_inner = y_outer;
546             x_inner = x_outer;
547             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
548                 mb_intra_ub = 0;
549                 score_dep = 0;
550                 if (x_inner != 0) {
551                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
552                     score_dep |= MB_SCOREBOARD_A;
553                 }
554                 if (y_inner != mb_row) {
555                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
556                     score_dep |= MB_SCOREBOARD_B;
557                     if (x_inner != 0)
558                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
559
560                     if (x_inner != (mb_width -1)) {
561                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
562                         score_dep |= MB_SCOREBOARD_C;
563                     }
564                 }
565
566                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
567                 *command_ptr++ = kernel;
568                 *command_ptr++ = USE_SCOREBOARD;
569                 /* Indirect data */
570                 *command_ptr++ = 0;
571                 /* the (X, Y) term of scoreboard */
572                 *command_ptr++ = ((y_inner << 16) | x_inner);
573                 *command_ptr++ = score_dep;
574                 /*inline data */
575                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
576                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
577
578                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
579                 *command_ptr++ = 0;
580                 x_inner -= 2;
581                 y_inner += 1;
582             }
583             x_outer++;
584             if (x_outer >= mb_width) {
585                 y_outer += 1;
586                 x_outer = xtemp_outer;
587             }
588         }
589     }
590
591     *command_ptr++ = MI_BATCH_BUFFER_END;
592     *command_ptr++ = 0;
593
594     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
595 }
596
597 static void
598 gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
599                               struct encode_state *encode_state,
600                               int mb_width, int mb_height,
601                               int kernel,
602                               int transform_8x8_mode_flag,
603                               struct intel_encoder_context *encoder_context)
604 {
605     struct gen6_vme_context *vme_context = encoder_context->vme_context;
606     int mb_x = 0, mb_y = 0;
607     int i, s;
608     unsigned int *command_ptr;
609     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
610     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
611     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
612     int qp;
613     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
614     int qp_mb, qp_index;
615
616     if (encoder_context->rate_control_mode == VA_RC_CQP)
617         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
618     else
619         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
620
621     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
622     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
623
624     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
625         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
626         int slice_mb_begin = pSliceParameter->macroblock_address;
627         int slice_mb_number = pSliceParameter->num_macroblocks;
628         unsigned int mb_intra_ub;
629         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
630         for (i = 0; i < slice_mb_number;  ) {
631             int mb_count = i + slice_mb_begin;
632             mb_x = mb_count % mb_width;
633             mb_y = mb_count / mb_width;
634             mb_intra_ub = 0;
635             if (mb_x != 0) {
636                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
637             }
638             if (mb_y != 0) {
639                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
640                 if (mb_x != 0)
641                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
642                 if (mb_x != (mb_width -1))
643                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
644             }
645             if (i < mb_width) {
646                 if (i == 0)
647                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
648                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
649                 if ((i == (mb_width - 1)) && slice_mb_x) {
650                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
651                 }
652             }
653
654             if ((i == mb_width) && slice_mb_x) {
655                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
656             }
657             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
658             *command_ptr++ = kernel;
659             *command_ptr++ = 0;
660             *command_ptr++ = 0;
661             *command_ptr++ = 0;
662             *command_ptr++ = 0;
663
664             /*inline data */
665             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
666             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
667             /* qp occupies one byte */
668             if (vme_context->roi_enabled) {
669                 qp_index = mb_y * mb_width + mb_x;
670                 qp_mb = *(vme_context->qp_per_mb + qp_index);
671             } else
672                 qp_mb = qp;
673             *command_ptr++ = qp_mb;
674
675             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
676             *command_ptr++ = 0;
677             i += 1;
678         }
679     }
680
681     *command_ptr++ = MI_BATCH_BUFFER_END;
682     *command_ptr++ = 0;
683
684     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
685 }
686
687 static void gen9_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
688 {
689     struct gen6_vme_context *vme_context = encoder_context->vme_context;
690
691     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
692
693     /* VME output buffer */
694     dri_bo_unreference(vme_context->vme_output.bo);
695     vme_context->vme_output.bo = NULL;
696
697     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
698     vme_context->vme_batchbuffer.bo = NULL;
699
700     /* VME state */
701     dri_bo_unreference(vme_context->vme_state.bo);
702     vme_context->vme_state.bo = NULL;
703 }
704
705 static void gen9_vme_pipeline_programing(VADriverContextP ctx,
706                                          struct encode_state *encode_state,
707                                          struct intel_encoder_context *encoder_context)
708 {
709     struct gen6_vme_context *vme_context = encoder_context->vme_context;
710     struct intel_batchbuffer *batch = encoder_context->base.batch;
711     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
712     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
713     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
714     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
715     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
716     int kernel_shader;
717     bool allow_hwscore = true;
718     int s;
719     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
720
721     if (is_low_quality)
722         allow_hwscore = false;
723     else {
724         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
725             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
726             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
727                 allow_hwscore = false;
728                 break;
729             }
730         }
731     }
732
733     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
734         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
735         kernel_shader = VME_INTRA_SHADER;
736     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
737                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
738         kernel_shader = VME_INTER_SHADER;
739     } else {
740         kernel_shader = VME_BINTER_SHADER;
741         if (!allow_hwscore)
742             kernel_shader = VME_INTER_SHADER;
743     }
744     if (allow_hwscore)
745         gen9wa_vme_walker_fill_vme_batchbuffer(ctx,
746                                                encode_state,
747                                                width_in_mbs, height_in_mbs,
748                                                kernel_shader,
749                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
750                                                encoder_context);
751     else
752         gen9_vme_fill_vme_batchbuffer(ctx,
753                                       encode_state,
754                                       width_in_mbs, height_in_mbs,
755                                       kernel_shader,
756                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
757                                       encoder_context);
758
759     intel_batchbuffer_start_atomic(batch, 0x1000);
760     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
761     BEGIN_BATCH(batch, 3);
762     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
763     OUT_RELOC(batch,
764               vme_context->vme_batchbuffer.bo,
765               I915_GEM_DOMAIN_COMMAND, 0,
766               0);
767     OUT_BATCH(batch, 0);
768     ADVANCE_BATCH(batch);
769
770     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
771
772     intel_batchbuffer_end_atomic(batch);
773 }
774
775 static VAStatus gen9_vme_prepare(VADriverContextP ctx,
776                                  struct encode_state *encode_state,
777                                  struct intel_encoder_context *encoder_context)
778 {
779     VAStatus vaStatus = VA_STATUS_SUCCESS;
780     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
781     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
782     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
783     struct gen6_vme_context *vme_context = encoder_context->vme_context;
784
785     if (!vme_context->h264_level ||
786         (vme_context->h264_level != pSequenceParameter->level_idc)) {
787             vme_context->h264_level = pSequenceParameter->level_idc;
788     }
789
790     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
791     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
792     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
793
794     /*Setup all the memory object*/
795     gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
796     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
797     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
798     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
799
800     /*Programing media pipeline*/
801     gen9_vme_pipeline_programing(ctx, encode_state, encoder_context);
802
803     return vaStatus;
804 }
805
806 static VAStatus gen9_vme_run(VADriverContextP ctx,
807                              struct encode_state *encode_state,
808                              struct intel_encoder_context *encoder_context)
809 {
810     struct intel_batchbuffer *batch = encoder_context->base.batch;
811
812     intel_batchbuffer_flush(batch);
813
814     return VA_STATUS_SUCCESS;
815 }
816
817 static VAStatus gen9_vme_stop(VADriverContextP ctx,
818                               struct encode_state *encode_state,
819                               struct intel_encoder_context *encoder_context)
820 {
821     return VA_STATUS_SUCCESS;
822 }
823
824 static VAStatus
825 gen9_vme_pipeline(VADriverContextP ctx,
826                   VAProfile profile,
827                   struct encode_state *encode_state,
828                   struct intel_encoder_context *encoder_context)
829 {
830     gen9_vme_media_init(ctx, encoder_context);
831     gen9_vme_prepare(ctx, encode_state, encoder_context);
832     gen9_vme_run(ctx, encode_state, encoder_context);
833     gen9_vme_stop(ctx, encode_state, encoder_context);
834
835     return VA_STATUS_SUCCESS;
836 }
837
838 static void
839 gen9_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
840                                    struct encode_state *encode_state,
841                                    int index,
842                                    int is_intra,
843                                    struct intel_encoder_context *encoder_context)
844
845 {
846     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
847     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
848     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
849
850     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
851 }
852
853 static void
854 gen9_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
855                                             struct encode_state *encode_state,
856                                             int index,
857                                             struct intel_encoder_context *encoder_context)
858
859 {
860     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
861     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
862     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
863
864     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
865 }
866
867 static VAStatus
868 gen9_vme_mpeg2_surface_setup(VADriverContextP ctx,
869                              struct encode_state *encode_state,
870                              int is_intra,
871                              struct intel_encoder_context *encoder_context)
872 {
873     struct object_surface *obj_surface;
874
875     /*Setup surfaces state*/
876     /* current picture for encoding */
877     obj_surface = encode_state->input_yuv_object;
878     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
879     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
880     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
881
882     if (!is_intra) {
883         /* reference 0 */
884         obj_surface = encode_state->reference_objects[0];
885
886         if (obj_surface->bo != NULL)
887             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
888
889         /* reference 1 */
890         obj_surface = encode_state->reference_objects[1];
891
892         if (obj_surface && obj_surface->bo != NULL)
893             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
894     }
895
896     /* VME output */
897     gen9_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
898     gen9_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
899
900     return VA_STATUS_SUCCESS;
901 }
902
903 static void
904 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
905                                            struct encode_state *encode_state,
906                                            int mb_width, int mb_height,
907                                            int kernel,
908                                            struct intel_encoder_context *encoder_context)
909 {
910     struct gen6_vme_context *vme_context = encoder_context->vme_context;
911     unsigned int *command_ptr;
912
913 #define         MPEG2_SCOREBOARD                (1 << 21)
914
915     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
916     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
917
918     {
919         unsigned int mb_intra_ub, score_dep;
920         int x_outer, y_outer, x_inner, y_inner;
921         int xtemp_outer = 0;
922         int first_mb = 0;
923         int num_mb = mb_width * mb_height;
924
925         x_outer = 0;
926         y_outer = 0;
927
928         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
929             x_inner = x_outer;
930             y_inner = y_outer;
931             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
932                 mb_intra_ub = 0;
933                 score_dep = 0;
934                 if (x_inner != 0) {
935                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
936                     score_dep |= MB_SCOREBOARD_A;
937                 }
938                 if (y_inner != 0) {
939                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
940                     score_dep |= MB_SCOREBOARD_B;
941
942                     if (x_inner != 0)
943                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
944
945                     if (x_inner != (mb_width -1)) {
946                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
947                         score_dep |= MB_SCOREBOARD_C;
948                     }
949                 }
950
951                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
952                 *command_ptr++ = kernel;
953                 *command_ptr++ = MPEG2_SCOREBOARD;
954                 /* Indirect data */
955                 *command_ptr++ = 0;
956                 /* the (X, Y) term of scoreboard */
957                 *command_ptr++ = ((y_inner << 16) | x_inner);
958                 *command_ptr++ = score_dep;
959                 /*inline data */
960                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
961                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
962                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
963                 *command_ptr++ = 0;
964
965                 x_inner -= 2;
966                 y_inner += 1;
967             }
968             x_outer += 1;
969         }
970
971         xtemp_outer = mb_width - 2;
972         if (xtemp_outer < 0)
973             xtemp_outer = 0;
974         x_outer = xtemp_outer;
975         y_outer = 0;
976         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
977             y_inner = y_outer;
978             x_inner = x_outer;
979             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
980                 mb_intra_ub = 0;
981                 score_dep = 0;
982                 if (x_inner != 0) {
983                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
984                     score_dep |= MB_SCOREBOARD_A;
985                 }
986                 if (y_inner != 0) {
987                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
988                     score_dep |= MB_SCOREBOARD_B;
989
990                     if (x_inner != 0)
991                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
992
993                     if (x_inner != (mb_width -1)) {
994                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
995                         score_dep |= MB_SCOREBOARD_C;
996                     }
997                 }
998
999                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1000                 *command_ptr++ = kernel;
1001                 *command_ptr++ = MPEG2_SCOREBOARD;
1002                 /* Indirect data */
1003                 *command_ptr++ = 0;
1004                 /* the (X, Y) term of scoreboard */
1005                 *command_ptr++ = ((y_inner << 16) | x_inner);
1006                 *command_ptr++ = score_dep;
1007                 /*inline data */
1008                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1009                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1010
1011                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1012                 *command_ptr++ = 0;
1013                 x_inner -= 2;
1014                 y_inner += 1;
1015             }
1016             x_outer++;
1017             if (x_outer >= mb_width) {
1018                 y_outer += 1;
1019                 x_outer = xtemp_outer;
1020             }
1021         }
1022     }
1023
1024     *command_ptr++ = MI_BATCH_BUFFER_END;
1025     *command_ptr++ = 0;
1026
1027     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1028     return;
1029 }
1030
1031 static void
1032 gen9_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1033                                     struct encode_state *encode_state,
1034                                     int mb_width, int mb_height,
1035                                     int kernel,
1036                                     int transform_8x8_mode_flag,
1037                                     struct intel_encoder_context *encoder_context)
1038 {
1039     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1040     int mb_x = 0, mb_y = 0;
1041     int i, s, j;
1042     unsigned int *command_ptr;
1043
1044
1045     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1046     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1047
1048     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1049         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1050
1051         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1052             int slice_mb_begin = slice_param->macroblock_address;
1053             int slice_mb_number = slice_param->num_macroblocks;
1054             unsigned int mb_intra_ub;
1055
1056             for (i = 0; i < slice_mb_number;) {
1057                 int mb_count = i + slice_mb_begin;
1058
1059                 mb_x = mb_count % mb_width;
1060                 mb_y = mb_count / mb_width;
1061                 mb_intra_ub = 0;
1062
1063                 if (mb_x != 0) {
1064                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1065                 }
1066
1067                 if (mb_y != 0) {
1068                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1069
1070                     if (mb_x != 0)
1071                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1072
1073                     if (mb_x != (mb_width -1))
1074                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1075                 }
1076
1077                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1078                 *command_ptr++ = kernel;
1079                 *command_ptr++ = 0;
1080                 *command_ptr++ = 0;
1081                 *command_ptr++ = 0;
1082                 *command_ptr++ = 0;
1083
1084                 /*inline data */
1085                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1086                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1087
1088                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1089                 *command_ptr++ = 0;
1090                 i += 1;
1091             }
1092
1093             slice_param++;
1094         }
1095     }
1096
1097     *command_ptr++ = MI_BATCH_BUFFER_END;
1098     *command_ptr++ = 0;
1099
1100     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1101 }
1102
1103 static void
1104 gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1105                                    struct encode_state *encode_state,
1106                                    int is_intra,
1107                                    struct intel_encoder_context *encoder_context)
1108 {
1109     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1110     struct intel_batchbuffer *batch = encoder_context->base.batch;
1111     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1112     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1113     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1114     bool allow_hwscore = true;
1115     int s;
1116     int kernel_shader;
1117     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1118
1119     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1120         int j;
1121         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1122
1123         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1124             if (slice_param->macroblock_address % width_in_mbs) {
1125                 allow_hwscore = false;
1126                 break;
1127             }
1128         }
1129     }
1130
1131     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1132     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1133         allow_hwscore = false;
1134         kernel_shader = VME_INTRA_SHADER;
1135     } else {
1136         kernel_shader = VME_INTER_SHADER;
1137     }
1138
1139     if (allow_hwscore)
1140         gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1141                                                    encode_state,
1142                                                    width_in_mbs, height_in_mbs,
1143                                                    kernel_shader,
1144                                                    encoder_context);
1145     else
1146         gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
1147                                             encode_state,
1148                                             width_in_mbs, height_in_mbs,
1149                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1150                                             0,
1151                                             encoder_context);
1152
1153     intel_batchbuffer_start_atomic(batch, 0x1000);
1154     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1155     BEGIN_BATCH(batch, 4);
1156     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1157     OUT_RELOC(batch,
1158               vme_context->vme_batchbuffer.bo,
1159               I915_GEM_DOMAIN_COMMAND, 0,
1160               0);
1161     OUT_BATCH(batch, 0);
1162     OUT_BATCH(batch, 0);
1163     ADVANCE_BATCH(batch);
1164
1165     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1166
1167     intel_batchbuffer_end_atomic(batch);
1168 }
1169
1170 static VAStatus
1171 gen9_vme_mpeg2_prepare(VADriverContextP ctx,
1172                        struct encode_state *encode_state,
1173                        struct intel_encoder_context *encoder_context)
1174 {
1175     VAStatus vaStatus = VA_STATUS_SUCCESS;
1176     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1177     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1178     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1179
1180     if ((!vme_context->mpeg2_level) ||
1181         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1182             vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1183     }
1184
1185     /*Setup all the memory object*/
1186     gen9_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1187     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1188     //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1189     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1190     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1191
1192     /*Programing media pipeline*/
1193     gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1194
1195     return vaStatus;
1196 }
1197
1198 static VAStatus
1199 gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
1200                         VAProfile profile,
1201                         struct encode_state *encode_state,
1202                         struct intel_encoder_context *encoder_context)
1203 {
1204     gen9_vme_media_init(ctx, encoder_context);
1205     gen9_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1206     gen9_vme_run(ctx, encode_state, encoder_context);
1207     gen9_vme_stop(ctx, encode_state, encoder_context);
1208
1209     return VA_STATUS_SUCCESS;
1210 }
1211
1212 static void
1213 gen9_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1214                                    struct encode_state *encode_state,
1215                                    int index,
1216                                    int is_intra,
1217                                    struct intel_encoder_context *encoder_context)
1218 {
1219     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1220     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1221     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1222
1223     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1224 }
1225
1226 static void
1227 gen9_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1228                                             struct encode_state *encode_state,
1229                                             int index,
1230                                             struct intel_encoder_context *encoder_context)
1231 {
1232     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1233     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1234     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1235
1236     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1237 }
1238
1239 static VAStatus
1240 gen9_vme_vp8_surface_setup(VADriverContextP ctx,
1241                              struct encode_state *encode_state,
1242                              int is_intra,
1243                              struct intel_encoder_context *encoder_context)
1244 {
1245     struct object_surface *obj_surface;
1246
1247     /*Setup surfaces state*/
1248     /* current picture for encoding */
1249     obj_surface = encode_state->input_yuv_object;
1250     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1251     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1252     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1253
1254     if (!is_intra) {
1255         /* reference 0 */
1256         obj_surface = encode_state->reference_objects[0];
1257
1258         if (obj_surface->bo != NULL)
1259             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1260
1261         /* reference 1 */
1262         obj_surface = encode_state->reference_objects[1];
1263
1264         if (obj_surface && obj_surface->bo != NULL)
1265             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1266     }
1267
1268     /* VME output */
1269     gen9_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1270     gen9_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1271
1272     return VA_STATUS_SUCCESS;
1273 }
1274
1275 static void
1276 gen9_vme_vp8_pipeline_programing(VADriverContextP ctx,
1277                                    struct encode_state *encode_state,
1278                                    int is_intra,
1279                                    struct intel_encoder_context *encoder_context)
1280 {
1281     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1282     struct intel_batchbuffer *batch = encoder_context->base.batch;
1283     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1284     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1285     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1286     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1287
1288     gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1289                                                  encode_state,
1290                                                  width_in_mbs, height_in_mbs,
1291                                                  kernel_shader,
1292                                                  encoder_context);
1293
1294     intel_batchbuffer_start_atomic(batch, 0x1000);
1295     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1296     BEGIN_BATCH(batch, 4);
1297     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1298     OUT_RELOC(batch,
1299               vme_context->vme_batchbuffer.bo,
1300               I915_GEM_DOMAIN_COMMAND, 0,
1301               0);
1302     OUT_BATCH(batch, 0);
1303     OUT_BATCH(batch, 0);
1304     ADVANCE_BATCH(batch);
1305
1306     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1307
1308     intel_batchbuffer_end_atomic(batch);
1309 }
1310
1311 static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
1312                                  struct encode_state *encode_state,
1313                                  struct intel_encoder_context *encoder_context)
1314 {
1315     VAStatus vaStatus = VA_STATUS_SUCCESS;
1316     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1317     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1318
1319     /* update vp8 mbmv cost */
1320     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1321
1322     /*Setup all the memory object*/
1323     gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1324     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1325     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1326
1327     /*Programing media pipeline*/
1328     gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1329
1330     return vaStatus;
1331 }
1332
1333 static VAStatus
1334 gen9_vme_vp8_pipeline(VADriverContextP ctx,
1335                         VAProfile profile,
1336                         struct encode_state *encode_state,
1337                         struct intel_encoder_context *encoder_context)
1338 {
1339     gen9_vme_media_init(ctx, encoder_context);
1340     gen9_vme_vp8_prepare(ctx, encode_state, encoder_context);
1341     gen9_vme_run(ctx, encode_state, encoder_context);
1342     gen9_vme_stop(ctx, encode_state, encoder_context);
1343
1344     return VA_STATUS_SUCCESS;
1345 }
1346
1347 /* HEVC */
1348
1349 static void
1350 gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
1351                              struct encode_state *encode_state,
1352                              int index,
1353                              struct intel_encoder_context *encoder_context)
1354
1355 {
1356     struct i965_driver_data *i965 = i965_driver_data(ctx);
1357     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1358     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1359     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1360     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1361     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1362     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1363
1364
1365     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
1366     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1367
1368     if (is_intra)
1369         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
1370     else
1371         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
1372     /*
1373      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
1374      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
1375      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
1376      */
1377
1378     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1379                                               "VME output buffer",
1380                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
1381                                               0x1000);
1382     assert(vme_context->vme_output.bo);
1383     vme_context->vme_buffer_suface_setup(ctx,
1384                                          &vme_context->gpe_context,
1385                                          &vme_context->vme_output,
1386                                          BINDING_TABLE_OFFSET(index),
1387                                          SURFACE_STATE_OFFSET(index));
1388 }
1389
1390 static void
1391 gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
1392                                       struct encode_state *encode_state,
1393                                       int index,
1394                                       struct intel_encoder_context *encoder_context)
1395
1396 {
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1399     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1400     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1401     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1402
1403     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
1404     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
1405     vme_context->vme_batchbuffer.pitch = 16;
1406     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
1407                                                    "VME batchbuffer",
1408                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
1409                                                    0x1000);
1410 }
1411 static VAStatus
1412 gen9_vme_hevc_surface_setup(VADriverContextP ctx,
1413                        struct encode_state *encode_state,
1414                        int is_intra,
1415                        struct intel_encoder_context *encoder_context)
1416 {
1417     struct object_surface *obj_surface;
1418
1419     /*Setup surfaces state*/
1420     /* current picture for encoding */
1421     obj_surface = encode_state->input_yuv_object;
1422     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1423     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1424     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1425
1426     if (!is_intra) {
1427         VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1428         int slice_type;
1429
1430         slice_type = slice_param->slice_type;
1431         assert(slice_type != HEVC_SLICE_I);
1432
1433         /* to do HEVC */
1434         intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
1435
1436         if (slice_type == HEVC_SLICE_B)
1437             intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
1438     }
1439
1440     /* VME output */
1441     gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
1442     gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1443
1444     return VA_STATUS_SUCCESS;
1445 }
1446 static void
1447 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1448                                      struct encode_state *encode_state,
1449                                      int mb_width, int mb_height,
1450                                      int kernel,
1451                                      int transform_8x8_mode_flag,
1452                                      struct intel_encoder_context *encoder_context)
1453 {
1454     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1455     int mb_row;
1456     int s;
1457     unsigned int *command_ptr;
1458     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1459     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1460     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1461     int ctb_size = 1 << log2_ctb_size;
1462     int num_mb_in_ctb = (ctb_size + 15)/16;
1463     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1464
1465 #define         USE_SCOREBOARD          (1 << 21)
1466
1467     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1468     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1469
1470     /*slice_segment_address  must picture_width_in_ctb alainment */
1471     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1472         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1473         int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1474         int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1475         unsigned int mb_intra_ub, score_dep;
1476         int x_outer, y_outer, x_inner, y_inner;
1477         int xtemp_outer = 0;
1478
1479         x_outer = first_mb % mb_width;
1480         y_outer = first_mb / mb_width;
1481         mb_row = y_outer;
1482
1483         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1484             x_inner = x_outer;
1485             y_inner = y_outer;
1486             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1487                 mb_intra_ub = 0;
1488                 score_dep = 0;
1489                 if (x_inner != 0) {
1490                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1491                     score_dep |= MB_SCOREBOARD_A;
1492                 }
1493                 if (y_inner != mb_row) {
1494                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1495                     score_dep |= MB_SCOREBOARD_B;
1496                     if (x_inner != 0)
1497                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1498                     if (x_inner != (mb_width -1)) {
1499                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1500                         score_dep |= MB_SCOREBOARD_C;
1501                     }
1502                 }
1503
1504                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1505                 *command_ptr++ = kernel;
1506                 *command_ptr++ = USE_SCOREBOARD;
1507                 /* Indirect data */
1508                 *command_ptr++ = 0;
1509                 /* the (X, Y) term of scoreboard */
1510                 *command_ptr++ = ((y_inner << 16) | x_inner);
1511                 *command_ptr++ = score_dep;
1512                 /*inline data */
1513                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1514                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1515                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1516                 *command_ptr++ = 0;
1517
1518                 x_inner -= 2;
1519                 y_inner += 1;
1520             }
1521             x_outer += 1;
1522         }
1523
1524         xtemp_outer = mb_width - 2;
1525         if (xtemp_outer < 0)
1526             xtemp_outer = 0;
1527         x_outer = xtemp_outer;
1528         y_outer = first_mb / mb_width;
1529         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1530             y_inner = y_outer;
1531             x_inner = x_outer;
1532             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1533                 mb_intra_ub = 0;
1534                 score_dep = 0;
1535                 if (x_inner != 0) {
1536                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1537                     score_dep |= MB_SCOREBOARD_A;
1538                 }
1539                 if (y_inner != mb_row) {
1540                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1541                     score_dep |= MB_SCOREBOARD_B;
1542                     if (x_inner != 0)
1543                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1544
1545                     if (x_inner != (mb_width -1)) {
1546                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1547                         score_dep |= MB_SCOREBOARD_C;
1548                     }
1549                 }
1550
1551                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1552                 *command_ptr++ = kernel;
1553                 *command_ptr++ = USE_SCOREBOARD;
1554                 /* Indirect data */
1555                 *command_ptr++ = 0;
1556                 /* the (X, Y) term of scoreboard */
1557                 *command_ptr++ = ((y_inner << 16) | x_inner);
1558                 *command_ptr++ = score_dep;
1559                 /*inline data */
1560                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1561                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1562
1563                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1564                 *command_ptr++ = 0;
1565                 x_inner -= 2;
1566                 y_inner += 1;
1567             }
1568             x_outer++;
1569             if (x_outer >= mb_width) {
1570                 y_outer += 1;
1571                 x_outer = xtemp_outer;
1572             }
1573         }
1574     }
1575
1576     *command_ptr++ = MI_BATCH_BUFFER_END;
1577     *command_ptr++ = 0;
1578
1579     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1580 }
1581
1582 static void
1583 gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
1584                               struct encode_state *encode_state,
1585                               int mb_width, int mb_height,
1586                               int kernel,
1587                               int transform_8x8_mode_flag,
1588                               struct intel_encoder_context *encoder_context)
1589 {
1590     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1591     int mb_x = 0, mb_y = 0;
1592     int i, s;
1593     unsigned int *command_ptr;
1594     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1595     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1596     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1597
1598     int ctb_size = 1 << log2_ctb_size;
1599     int num_mb_in_ctb = (ctb_size + 15)/16;
1600     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1601
1602     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1603     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1604
1605     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1606         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1607         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1608         int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1609
1610         unsigned int mb_intra_ub;
1611         int slice_mb_x = slice_mb_begin % mb_width;
1612         for (i = 0; i < slice_mb_number;  ) {
1613             int mb_count = i + slice_mb_begin;
1614             mb_x = mb_count % mb_width;
1615             mb_y = mb_count / mb_width;
1616             mb_intra_ub = 0;
1617
1618             if (mb_x != 0) {
1619                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1620             }
1621             if (mb_y != 0) {
1622                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1623                 if (mb_x != 0)
1624                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1625                 if (mb_x != (mb_width -1))
1626                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1627             }
1628             if (i < mb_width) {
1629                 if (i == 0)
1630                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
1631                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
1632                 if ((i == (mb_width - 1)) && slice_mb_x) {
1633                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1634                 }
1635             }
1636
1637             if ((i == mb_width) && slice_mb_x) {
1638                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
1639             }
1640
1641             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1642             *command_ptr++ = kernel;
1643             *command_ptr++ = 0;
1644             *command_ptr++ = 0;
1645             *command_ptr++ = 0;
1646             *command_ptr++ = 0;
1647
1648             /*inline data */
1649             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1650             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1651
1652             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1653             *command_ptr++ = 0;
1654             i += 1;
1655         }
1656     }
1657
1658     *command_ptr++ = MI_BATCH_BUFFER_END;
1659     *command_ptr++ = 0;
1660
1661     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1662 }
1663
1664 static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
1665                                          struct encode_state *encode_state,
1666                                          struct intel_encoder_context *encoder_context)
1667 {
1668     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1669     struct intel_batchbuffer *batch = encoder_context->base.batch;
1670     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1671     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1672     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1673     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1674     int kernel_shader;
1675     bool allow_hwscore = true;
1676     int s;
1677
1678     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1679     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1680
1681     int ctb_size = 1 << log2_ctb_size;
1682     int num_mb_in_ctb = (ctb_size + 15)/16;
1683     int transform_8x8_mode_flag = 1;
1684     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1685
1686     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1687         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1688         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1689         if ((slice_mb_begin % width_in_mbs)) {
1690             allow_hwscore = false;
1691             break;
1692         }
1693     }
1694
1695     if (pSliceParameter->slice_type == HEVC_SLICE_I) {
1696         kernel_shader = VME_INTRA_SHADER;
1697     } else if (pSliceParameter->slice_type == HEVC_SLICE_P) {
1698         kernel_shader = VME_INTER_SHADER;
1699     } else {
1700         kernel_shader = VME_BINTER_SHADER;
1701         if (!allow_hwscore)
1702             kernel_shader = VME_INTER_SHADER;
1703     }
1704     if (allow_hwscore)
1705         gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
1706                                                encode_state,
1707                                                width_in_mbs, height_in_mbs,
1708                                                kernel_shader,
1709                                                transform_8x8_mode_flag,
1710                                                encoder_context);
1711     else
1712         gen9_vme_hevc_fill_vme_batchbuffer(ctx,
1713                                       encode_state,
1714                                       width_in_mbs, height_in_mbs,
1715                                       kernel_shader,
1716                                       transform_8x8_mode_flag,
1717                                       encoder_context);
1718
1719     intel_batchbuffer_start_atomic(batch, 0x1000);
1720     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1721     BEGIN_BATCH(batch, 3);
1722     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1723     OUT_RELOC(batch,
1724               vme_context->vme_batchbuffer.bo,
1725               I915_GEM_DOMAIN_COMMAND, 0,
1726               0);
1727     OUT_BATCH(batch, 0);
1728     ADVANCE_BATCH(batch);
1729
1730     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1731
1732     intel_batchbuffer_end_atomic(batch);
1733 }
1734
1735 static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
1736                                  struct encode_state *encode_state,
1737                                  struct intel_encoder_context *encoder_context)
1738 {
1739     VAStatus vaStatus = VA_STATUS_SUCCESS;
1740     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1741     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1742     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1743     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1744
1745     /* here use the avc level for hevc vme */
1746     if (!vme_context->hevc_level ||
1747         (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
1748         vme_context->hevc_level = pSequenceParameter->general_level_idc;
1749     }
1750
1751     intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
1752
1753     /*Setup all the memory object*/
1754     gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
1755     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1756     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
1757     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1758
1759     /*Programing media pipeline*/
1760     gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
1761
1762     return vaStatus;
1763 }
1764
1765
1766 static VAStatus
1767 gen9_vme_hevc_pipeline(VADriverContextP ctx,
1768                   VAProfile profile,
1769                   struct encode_state *encode_state,
1770                   struct intel_encoder_context *encoder_context)
1771 {
1772     gen9_vme_media_init(ctx, encoder_context);
1773     gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
1774     gen9_vme_run(ctx, encode_state, encoder_context);
1775     gen9_vme_stop(ctx, encode_state, encoder_context);
1776
1777     return VA_STATUS_SUCCESS;
1778 }
1779
1780
1781 static void
1782 gen9_vme_context_destroy(void *context)
1783 {
1784     struct gen6_vme_context *vme_context = context;
1785
1786     gen8_gpe_context_destroy(&vme_context->gpe_context);
1787
1788     dri_bo_unreference(vme_context->vme_output.bo);
1789     vme_context->vme_output.bo = NULL;
1790
1791     dri_bo_unreference(vme_context->vme_state.bo);
1792     vme_context->vme_state.bo = NULL;
1793
1794     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1795     vme_context->vme_batchbuffer.bo = NULL;
1796
1797     free(vme_context->vme_state_message);
1798     vme_context->vme_state_message = NULL;
1799
1800     dri_bo_unreference(vme_context->i_qp_cost_table);
1801     vme_context->i_qp_cost_table = NULL;
1802
1803     dri_bo_unreference(vme_context->p_qp_cost_table);
1804     vme_context->p_qp_cost_table = NULL;
1805
1806     dri_bo_unreference(vme_context->b_qp_cost_table);
1807     vme_context->b_qp_cost_table = NULL;
1808
1809     free(vme_context->qp_per_mb);
1810     vme_context->qp_per_mb = NULL;
1811
1812     free(vme_context);
1813 }
1814
1815 Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1816 {
1817     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1818     struct i965_kernel *vme_kernel_list = NULL;
1819     int i965_kernel_num;
1820
1821     switch (encoder_context->codec) {
1822     case CODEC_H264:
1823     case CODEC_H264_MVC:
1824         vme_kernel_list = gen9_vme_kernels;
1825         encoder_context->vme_pipeline = gen9_vme_pipeline;
1826         i965_kernel_num = sizeof(gen9_vme_kernels) / sizeof(struct i965_kernel);
1827         break;
1828
1829     case CODEC_MPEG2:
1830         vme_kernel_list = gen9_vme_mpeg2_kernels;
1831         encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
1832         i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
1833         break;
1834
1835     case CODEC_VP8:
1836         vme_kernel_list = gen9_vme_vp8_kernels;
1837         encoder_context->vme_pipeline = gen9_vme_vp8_pipeline;
1838         i965_kernel_num = sizeof(gen9_vme_vp8_kernels) / sizeof(struct i965_kernel);
1839         break;
1840
1841     case CODEC_HEVC:
1842         vme_kernel_list = gen9_vme_hevc_kernels;
1843         encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
1844         i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
1845         break;
1846
1847     default:
1848         /* never get here */
1849         assert(0);
1850
1851         break;
1852     }
1853
1854     assert(vme_context);
1855     vme_context->vme_kernel_sum = i965_kernel_num;
1856     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1857
1858     vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
1859     vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
1860     vme_context->gpe_context.sampler_size = 0;
1861
1862
1863     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1864     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1865     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1866     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1867     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1868
1869     gen7_vme_scoreboard_init(ctx, vme_context);
1870
1871     gen8_gpe_load_kernels(ctx,
1872                           &vme_context->gpe_context,
1873                           vme_kernel_list,
1874                           i965_kernel_num);
1875     vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1876     vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1877     vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1878     vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1879
1880     encoder_context->vme_context = vme_context;
1881     encoder_context->vme_context_destroy = gen9_vme_context_destroy;
1882
1883     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1884
1885     return True;
1886 }