OSDN Git Service

Encoding: Add one ROI flag and ROI buffer
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vme.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43
44 #ifdef SURFACE_STATE_PADDED_SIZE
45 #undef SURFACE_STATE_PADDED_SIZE
46 #endif
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define VME_INTRA_SHADER        0
53 #define VME_INTER_SHADER        1
54 #define VME_BINTER_SHADER       2
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 #define VME_MSG_LENGTH          32
61
62 static const uint32_t gen9_vme_intra_frame[][4] = {
63 #include "shaders/vme/intra_frame_gen9.g9b"
64 };
65
66 static const uint32_t gen9_vme_inter_frame[][4] = {
67 #include "shaders/vme/inter_frame_gen9.g9b"
68 };
69
70 static const uint32_t gen9_vme_inter_bframe[][4] = {
71 #include "shaders/vme/inter_bframe_gen9.g9b"
72 };
73
74 static struct i965_kernel gen9_vme_kernels[] = {
75     {
76         "VME Intra Frame",
77         VME_INTRA_SHADER, /*index*/
78         gen9_vme_intra_frame,
79         sizeof(gen9_vme_intra_frame),
80         NULL
81     },
82     {
83         "VME inter Frame",
84         VME_INTER_SHADER,
85         gen9_vme_inter_frame,
86         sizeof(gen9_vme_inter_frame),
87         NULL
88     },
89     {
90         "VME inter BFrame",
91         VME_BINTER_SHADER,
92         gen9_vme_inter_bframe,
93         sizeof(gen9_vme_inter_bframe),
94         NULL
95     }
96 };
97
98 static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = {
99 #include "shaders/vme/intra_frame_gen9.g9b"
100 };
101
102 static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = {
103 #include "shaders/vme/mpeg2_inter_gen9.g9b"
104 };
105
106 static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
107     {
108         "VME Intra Frame",
109         VME_INTRA_SHADER, /*index*/
110         gen9_vme_mpeg2_intra_frame,
111         sizeof(gen9_vme_mpeg2_intra_frame),
112         NULL
113     },
114     {
115         "VME inter Frame",
116         VME_INTER_SHADER,
117         gen9_vme_mpeg2_inter_frame,
118         sizeof(gen9_vme_mpeg2_inter_frame),
119         NULL
120     },
121 };
122
123 static const uint32_t gen9_vme_vp8_intra_frame[][4] = {
124 #include "shaders/vme/vp8_intra_frame_gen9.g9b"
125 };
126
127 static const uint32_t gen9_vme_vp8_inter_frame[][4] = {
128 #include "shaders/vme/vp8_inter_frame_gen9.g9b"
129 };
130
131 static struct i965_kernel gen9_vme_vp8_kernels[] = {
132     {
133         "VME Intra Frame",
134         VME_INTRA_SHADER, /*index*/
135         gen9_vme_vp8_intra_frame,
136         sizeof(gen9_vme_vp8_intra_frame),
137         NULL
138     },
139     {
140         "VME inter Frame",
141         VME_INTER_SHADER,
142         gen9_vme_vp8_inter_frame,
143         sizeof(gen9_vme_vp8_inter_frame),
144         NULL
145     },
146 };
147
148 /* HEVC */
149
150 static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
151 #include "shaders/vme/intra_frame_gen9.g9b"
152 };
153
154 static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
155 #include "shaders/vme/inter_frame_gen9.g9b"
156 };
157
158 static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
159 #include "shaders/vme/inter_bframe_gen9.g9b"
160 };
161
162 static struct i965_kernel gen9_vme_hevc_kernels[] = {
163     {
164         "VME Intra Frame",
165         VME_INTRA_SHADER, /*index*/
166         gen9_vme_hevc_intra_frame,
167         sizeof(gen9_vme_hevc_intra_frame),
168         NULL
169     },
170     {
171         "VME inter Frame",
172         VME_INTER_SHADER,
173         gen9_vme_hevc_inter_frame,
174         sizeof(gen9_vme_hevc_inter_frame),
175         NULL
176     },
177     {
178         "VME inter BFrame",
179         VME_BINTER_SHADER,
180         gen9_vme_hevc_inter_bframe,
181         sizeof(gen9_vme_hevc_inter_bframe),
182         NULL
183     }
184 };
185 /* only used for VME source surface state */
186 static void
187 gen9_vme_source_surface_state(VADriverContextP ctx,
188                               int index,
189                               struct object_surface *obj_surface,
190                               struct intel_encoder_context *encoder_context)
191 {
192     struct gen6_vme_context *vme_context = encoder_context->vme_context;
193
194     vme_context->vme_surface2_setup(ctx,
195                                     &vme_context->gpe_context,
196                                     obj_surface,
197                                     BINDING_TABLE_OFFSET(index),
198                                     SURFACE_STATE_OFFSET(index));
199 }
200
201 static void
202 gen9_vme_media_source_surface_state(VADriverContextP ctx,
203                                     int index,
204                                     struct object_surface *obj_surface,
205                                     struct intel_encoder_context *encoder_context)
206 {
207     struct gen6_vme_context *vme_context = encoder_context->vme_context;
208
209     vme_context->vme_media_rw_surface_setup(ctx,
210                                             &vme_context->gpe_context,
211                                             obj_surface,
212                                             BINDING_TABLE_OFFSET(index),
213                                             SURFACE_STATE_OFFSET(index));
214 }
215
216 static void
217 gen9_vme_media_chroma_source_surface_state(VADriverContextP ctx,
218                                            int index,
219                                            struct object_surface *obj_surface,
220                                            struct intel_encoder_context *encoder_context)
221 {
222     struct gen6_vme_context *vme_context = encoder_context->vme_context;
223
224     vme_context->vme_media_chroma_surface_setup(ctx,
225                                                 &vme_context->gpe_context,
226                                                 obj_surface,
227                                                 BINDING_TABLE_OFFSET(index),
228                                                 SURFACE_STATE_OFFSET(index));
229 }
230
231 static void
232 gen9_vme_output_buffer_setup(VADriverContextP ctx,
233                              struct encode_state *encode_state,
234                              int index,
235                              struct intel_encoder_context *encoder_context,
236                              int is_intra,
237                              int width_in_mbs,
238                              int height_in_mbs)
239
240 {
241     struct i965_driver_data *i965 = i965_driver_data(ctx);
242     struct gen6_vme_context *vme_context = encoder_context->vme_context;
243
244     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
245     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
246
247     if (is_intra)
248         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
249     else
250         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
251     /*
252      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
253      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
254      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
255      */
256
257     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
258                                               "VME output buffer",
259                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
260                                               0x1000);
261     assert(vme_context->vme_output.bo);
262     vme_context->vme_buffer_suface_setup(ctx,
263                                          &vme_context->gpe_context,
264                                          &vme_context->vme_output,
265                                          BINDING_TABLE_OFFSET(index),
266                                          SURFACE_STATE_OFFSET(index));
267 }
268
269 static void
270 gen9_vme_avc_output_buffer_setup(VADriverContextP ctx,
271                              struct encode_state *encode_state,
272                              int index,
273                              struct intel_encoder_context *encoder_context)
274 {
275     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
276     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
277     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
278     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
279     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
280
281     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
282
283 }
284
285 static void
286 gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
287                                       struct encode_state *encode_state,
288                                       int index,
289                                       struct intel_encoder_context *encoder_context,
290                                       int width_in_mbs,
291                                       int height_in_mbs)
292 {
293     struct i965_driver_data *i965 = i965_driver_data(ctx);
294     struct gen6_vme_context *vme_context = encoder_context->vme_context;
295
296     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
297     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
298     vme_context->vme_batchbuffer.pitch = 16;
299     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
300                                                    "VME batchbuffer",
301                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
302                                                    0x1000);
303     vme_context->vme_buffer_suface_setup(ctx,
304                                          &vme_context->gpe_context,
305                                          &vme_context->vme_batchbuffer,
306                                          BINDING_TABLE_OFFSET(index),
307                                          SURFACE_STATE_OFFSET(index));
308 }
309
310 static void
311 gen9_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
312                                       struct encode_state *encode_state,
313                                       int index,
314                                       struct intel_encoder_context *encoder_context)
315 {
316     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
317     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
318     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
319
320     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
321 }
322
323                                       
324 static VAStatus
325 gen9_vme_surface_setup(VADriverContextP ctx,
326                        struct encode_state *encode_state,
327                        int is_intra,
328                        struct intel_encoder_context *encoder_context)
329 {
330     struct object_surface *obj_surface;
331
332     /*Setup surfaces state*/
333     /* current picture for encoding */
334     obj_surface = encode_state->input_yuv_object;
335     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
336     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
337     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
338
339     if (!is_intra) {
340         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
341         int slice_type;
342
343         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
344         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
345
346         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
347
348         if (slice_type == SLICE_TYPE_B)
349             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
350     }
351
352     /* VME output */
353     gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
354     gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
355
356     return VA_STATUS_SUCCESS;
357 }
358
359 static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
360                                          struct encode_state *encode_state,
361                                          struct intel_encoder_context *encoder_context)
362 {
363     struct gen6_vme_context *vme_context = encoder_context->vme_context;
364     struct gen8_interface_descriptor_data *desc;
365     int i;
366     dri_bo *bo;
367     unsigned char *desc_ptr;
368
369     bo = vme_context->gpe_context.dynamic_state.bo;
370     dri_bo_map(bo, 1);
371     assert(bo->virtual);
372     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
373
374     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
375
376     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
377         struct i965_kernel *kernel;
378         kernel = &vme_context->gpe_context.kernels[i];
379         assert(sizeof(*desc) == 32);
380         /*Setup the descritor table*/
381         memset(desc, 0, sizeof(*desc));
382         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
383         desc->desc3.sampler_count = 0; /* FIXME: */
384         desc->desc3.sampler_state_pointer = 0;
385         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
386         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
387         desc->desc5.constant_urb_entry_read_offset = 0;
388         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
389
390         desc++;
391     }
392
393     dri_bo_unmap(bo);
394
395     return VA_STATUS_SUCCESS;
396 }
397
398 static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
399                                         struct encode_state *encode_state,
400                                         struct intel_encoder_context *encoder_context)
401 {
402     struct gen6_vme_context *vme_context = encoder_context->vme_context;
403     unsigned char *constant_buffer;
404     unsigned int *vme_state_message;
405     int mv_num = 32;
406
407     vme_state_message = (unsigned int *)vme_context->vme_state_message;
408
409     if (encoder_context->codec == CODEC_H264 ||
410         encoder_context->codec == CODEC_H264_MVC) {
411         if (vme_context->h264_level >= 30) {
412             mv_num = 16;
413
414             if (vme_context->h264_level >= 31)
415                 mv_num = 8;
416         }
417     } else if (encoder_context->codec == CODEC_MPEG2) {
418         mv_num = 2;
419     }else if (encoder_context->codec == CODEC_HEVC) {
420         if (vme_context->hevc_level >= 30*3) {
421             mv_num = 16;
422
423             if (vme_context->hevc_level >= 31*3)
424                 mv_num = 8;
425         }/* use the avc level setting */
426     }
427
428     vme_state_message[31] = mv_num;
429
430     dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
431     assert(vme_context->gpe_context.dynamic_state.bo->virtual);
432     constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
433                                          vme_context->gpe_context.curbe_offset;
434
435     /* VME MV/Mb cost table is passed by using const buffer */
436     /* Now it uses the fixed search path. So it is constructed directly
437      * in the GPU shader.
438      */
439     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
440
441     dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
442
443     return VA_STATUS_SUCCESS;
444 }
445
446 #define         MB_SCOREBOARD_A         (1 << 0)
447 #define         MB_SCOREBOARD_B         (1 << 1)
448 #define         MB_SCOREBOARD_C         (1 << 2)
449
450 /* check whether the mb of (x_index, y_index) is out of bound */
451 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
452 {
453     int mb_index;
454     if (x_index < 0 || x_index >= mb_width)
455         return -1;
456     if (y_index < 0 || y_index >= mb_height)
457         return -1;
458
459     mb_index = y_index * mb_width + x_index;
460     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
461         return -1;
462     return 0;
463 }
464
465 static void
466 gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
467                                      struct encode_state *encode_state,
468                                      int mb_width, int mb_height,
469                                      int kernel,
470                                      int transform_8x8_mode_flag,
471                                      struct intel_encoder_context *encoder_context)
472 {
473     struct gen6_vme_context *vme_context = encoder_context->vme_context;
474     int mb_row;
475     int s;
476     unsigned int *command_ptr;
477
478 #define         USE_SCOREBOARD          (1 << 21)
479
480     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
481     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
482
483     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
484         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
485         int first_mb = pSliceParameter->macroblock_address;
486         int num_mb = pSliceParameter->num_macroblocks;
487         unsigned int mb_intra_ub, score_dep;
488         int x_outer, y_outer, x_inner, y_inner;
489         int xtemp_outer = 0;
490
491         x_outer = first_mb % mb_width;
492         y_outer = first_mb / mb_width;
493         mb_row = y_outer;
494
495         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
496             x_inner = x_outer;
497             y_inner = y_outer;
498             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
499                 mb_intra_ub = 0;
500                 score_dep = 0;
501                 if (x_inner != 0) {
502                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
503                     score_dep |= MB_SCOREBOARD_A;
504                 }
505                 if (y_inner != mb_row) {
506                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
507                     score_dep |= MB_SCOREBOARD_B;
508                     if (x_inner != 0)
509                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
510                     if (x_inner != (mb_width -1)) {
511                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
512                         score_dep |= MB_SCOREBOARD_C;
513                     }
514                 }
515
516                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
517                 *command_ptr++ = kernel;
518                 *command_ptr++ = USE_SCOREBOARD;
519                 /* Indirect data */
520                 *command_ptr++ = 0;
521                 /* the (X, Y) term of scoreboard */
522                 *command_ptr++ = ((y_inner << 16) | x_inner);
523                 *command_ptr++ = score_dep;
524                 /*inline data */
525                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
526                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
527                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
528                 *command_ptr++ = 0;
529
530                 x_inner -= 2;
531                 y_inner += 1;
532             }
533             x_outer += 1;
534         }
535
536         xtemp_outer = mb_width - 2;
537         if (xtemp_outer < 0)
538             xtemp_outer = 0;
539         x_outer = xtemp_outer;
540         y_outer = first_mb / mb_width;
541         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
542             y_inner = y_outer;
543             x_inner = x_outer;
544             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
545                 mb_intra_ub = 0;
546                 score_dep = 0;
547                 if (x_inner != 0) {
548                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
549                     score_dep |= MB_SCOREBOARD_A;
550                 }
551                 if (y_inner != mb_row) {
552                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
553                     score_dep |= MB_SCOREBOARD_B;
554                     if (x_inner != 0)
555                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
556
557                     if (x_inner != (mb_width -1)) {
558                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
559                         score_dep |= MB_SCOREBOARD_C;
560                     }
561                 }
562
563                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
564                 *command_ptr++ = kernel;
565                 *command_ptr++ = USE_SCOREBOARD;
566                 /* Indirect data */
567                 *command_ptr++ = 0;
568                 /* the (X, Y) term of scoreboard */
569                 *command_ptr++ = ((y_inner << 16) | x_inner);
570                 *command_ptr++ = score_dep;
571                 /*inline data */
572                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
573                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
574
575                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
576                 *command_ptr++ = 0;
577                 x_inner -= 2;
578                 y_inner += 1;
579             }
580             x_outer++;
581             if (x_outer >= mb_width) {
582                 y_outer += 1;
583                 x_outer = xtemp_outer;
584             }
585         }
586     }
587
588     *command_ptr++ = MI_BATCH_BUFFER_END;
589     *command_ptr++ = 0;
590
591     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
592 }
593
594 static void
595 gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
596                               struct encode_state *encode_state,
597                               int mb_width, int mb_height,
598                               int kernel,
599                               int transform_8x8_mode_flag,
600                               struct intel_encoder_context *encoder_context)
601 {
602     struct gen6_vme_context *vme_context = encoder_context->vme_context;
603     int mb_x = 0, mb_y = 0;
604     int i, s;
605     unsigned int *command_ptr;
606
607     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
608     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
609
610     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
611         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
612         int slice_mb_begin = pSliceParameter->macroblock_address;
613         int slice_mb_number = pSliceParameter->num_macroblocks;
614         unsigned int mb_intra_ub;
615         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
616         for (i = 0; i < slice_mb_number;  ) {
617             int mb_count = i + slice_mb_begin;
618             mb_x = mb_count % mb_width;
619             mb_y = mb_count / mb_width;
620             mb_intra_ub = 0;
621             if (mb_x != 0) {
622                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
623             }
624             if (mb_y != 0) {
625                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
626                 if (mb_x != 0)
627                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
628                 if (mb_x != (mb_width -1))
629                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
630             }
631             if (i < mb_width) {
632                 if (i == 0)
633                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
634                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
635                 if ((i == (mb_width - 1)) && slice_mb_x) {
636                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
637                 }
638             }
639
640             if ((i == mb_width) && slice_mb_x) {
641                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
642             }
643             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
644             *command_ptr++ = kernel;
645             *command_ptr++ = 0;
646             *command_ptr++ = 0;
647             *command_ptr++ = 0;
648             *command_ptr++ = 0;
649
650             /*inline data */
651             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
652             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
653
654             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
655             *command_ptr++ = 0;
656             i += 1;
657         }
658     }
659
660     *command_ptr++ = MI_BATCH_BUFFER_END;
661     *command_ptr++ = 0;
662
663     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
664 }
665
666 static void gen9_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
667 {
668     struct gen6_vme_context *vme_context = encoder_context->vme_context;
669
670     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
671
672     /* VME output buffer */
673     dri_bo_unreference(vme_context->vme_output.bo);
674     vme_context->vme_output.bo = NULL;
675
676     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
677     vme_context->vme_batchbuffer.bo = NULL;
678
679     /* VME state */
680     dri_bo_unreference(vme_context->vme_state.bo);
681     vme_context->vme_state.bo = NULL;
682 }
683
684 static void gen9_vme_pipeline_programing(VADriverContextP ctx,
685                                          struct encode_state *encode_state,
686                                          struct intel_encoder_context *encoder_context)
687 {
688     struct gen6_vme_context *vme_context = encoder_context->vme_context;
689     struct intel_batchbuffer *batch = encoder_context->base.batch;
690     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
691     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
692     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
693     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
694     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
695     int kernel_shader;
696     bool allow_hwscore = true;
697     int s;
698     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
699
700     if (is_low_quality)
701         allow_hwscore = false;
702     else {
703         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
704             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
705             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
706                 allow_hwscore = false;
707                 break;
708             }
709         }
710     }
711
712     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
713         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
714         kernel_shader = VME_INTRA_SHADER;
715     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
716                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
717         kernel_shader = VME_INTER_SHADER;
718     } else {
719         kernel_shader = VME_BINTER_SHADER;
720         if (!allow_hwscore)
721             kernel_shader = VME_INTER_SHADER;
722     }
723     if (allow_hwscore)
724         gen9wa_vme_walker_fill_vme_batchbuffer(ctx,
725                                                encode_state,
726                                                width_in_mbs, height_in_mbs,
727                                                kernel_shader,
728                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
729                                                encoder_context);
730     else
731         gen9_vme_fill_vme_batchbuffer(ctx,
732                                       encode_state,
733                                       width_in_mbs, height_in_mbs,
734                                       kernel_shader,
735                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
736                                       encoder_context);
737
738     intel_batchbuffer_start_atomic(batch, 0x1000);
739     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
740     BEGIN_BATCH(batch, 3);
741     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
742     OUT_RELOC(batch,
743               vme_context->vme_batchbuffer.bo,
744               I915_GEM_DOMAIN_COMMAND, 0,
745               0);
746     OUT_BATCH(batch, 0);
747     ADVANCE_BATCH(batch);
748
749     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
750
751     intel_batchbuffer_end_atomic(batch);
752 }
753
754 static VAStatus gen9_vme_prepare(VADriverContextP ctx,
755                                  struct encode_state *encode_state,
756                                  struct intel_encoder_context *encoder_context)
757 {
758     VAStatus vaStatus = VA_STATUS_SUCCESS;
759     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
760     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
761     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
762     struct gen6_vme_context *vme_context = encoder_context->vme_context;
763
764     if (!vme_context->h264_level ||
765         (vme_context->h264_level != pSequenceParameter->level_idc)) {
766             vme_context->h264_level = pSequenceParameter->level_idc;
767     }
768
769     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
770
771     /*Setup all the memory object*/
772     gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
773     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
774     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
775     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
776
777     /*Programing media pipeline*/
778     gen9_vme_pipeline_programing(ctx, encode_state, encoder_context);
779
780     return vaStatus;
781 }
782
783 static VAStatus gen9_vme_run(VADriverContextP ctx,
784                              struct encode_state *encode_state,
785                              struct intel_encoder_context *encoder_context)
786 {
787     struct intel_batchbuffer *batch = encoder_context->base.batch;
788
789     intel_batchbuffer_flush(batch);
790
791     return VA_STATUS_SUCCESS;
792 }
793
794 static VAStatus gen9_vme_stop(VADriverContextP ctx,
795                               struct encode_state *encode_state,
796                               struct intel_encoder_context *encoder_context)
797 {
798     return VA_STATUS_SUCCESS;
799 }
800
801 static VAStatus
802 gen9_vme_pipeline(VADriverContextP ctx,
803                   VAProfile profile,
804                   struct encode_state *encode_state,
805                   struct intel_encoder_context *encoder_context)
806 {
807     gen9_vme_media_init(ctx, encoder_context);
808     gen9_vme_prepare(ctx, encode_state, encoder_context);
809     gen9_vme_run(ctx, encode_state, encoder_context);
810     gen9_vme_stop(ctx, encode_state, encoder_context);
811
812     return VA_STATUS_SUCCESS;
813 }
814
815 static void
816 gen9_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
817                                    struct encode_state *encode_state,
818                                    int index,
819                                    int is_intra,
820                                    struct intel_encoder_context *encoder_context)
821
822 {
823     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
824     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
825     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
826
827     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
828 }
829
830 static void
831 gen9_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
832                                             struct encode_state *encode_state,
833                                             int index,
834                                             struct intel_encoder_context *encoder_context)
835
836 {
837     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
838     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
839     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
840
841     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
842 }
843
844 static VAStatus
845 gen9_vme_mpeg2_surface_setup(VADriverContextP ctx,
846                              struct encode_state *encode_state,
847                              int is_intra,
848                              struct intel_encoder_context *encoder_context)
849 {
850     struct object_surface *obj_surface;
851
852     /*Setup surfaces state*/
853     /* current picture for encoding */
854     obj_surface = encode_state->input_yuv_object;
855     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
856     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
857     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
858
859     if (!is_intra) {
860         /* reference 0 */
861         obj_surface = encode_state->reference_objects[0];
862
863         if (obj_surface->bo != NULL)
864             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
865
866         /* reference 1 */
867         obj_surface = encode_state->reference_objects[1];
868
869         if (obj_surface && obj_surface->bo != NULL)
870             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
871     }
872
873     /* VME output */
874     gen9_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
875     gen9_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
876
877     return VA_STATUS_SUCCESS;
878 }
879
880 static void
881 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
882                                            struct encode_state *encode_state,
883                                            int mb_width, int mb_height,
884                                            int kernel,
885                                            struct intel_encoder_context *encoder_context)
886 {
887     struct gen6_vme_context *vme_context = encoder_context->vme_context;
888     unsigned int *command_ptr;
889
890 #define         MPEG2_SCOREBOARD                (1 << 21)
891
892     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
893     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
894
895     {
896         unsigned int mb_intra_ub, score_dep;
897         int x_outer, y_outer, x_inner, y_inner;
898         int xtemp_outer = 0;
899         int first_mb = 0;
900         int num_mb = mb_width * mb_height;
901
902         x_outer = 0;
903         y_outer = 0;
904
905         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
906             x_inner = x_outer;
907             y_inner = y_outer;
908             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
909                 mb_intra_ub = 0;
910                 score_dep = 0;
911                 if (x_inner != 0) {
912                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
913                     score_dep |= MB_SCOREBOARD_A;
914                 }
915                 if (y_inner != 0) {
916                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
917                     score_dep |= MB_SCOREBOARD_B;
918
919                     if (x_inner != 0)
920                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
921
922                     if (x_inner != (mb_width -1)) {
923                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
924                         score_dep |= MB_SCOREBOARD_C;
925                     }
926                 }
927
928                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
929                 *command_ptr++ = kernel;
930                 *command_ptr++ = MPEG2_SCOREBOARD;
931                 /* Indirect data */
932                 *command_ptr++ = 0;
933                 /* the (X, Y) term of scoreboard */
934                 *command_ptr++ = ((y_inner << 16) | x_inner);
935                 *command_ptr++ = score_dep;
936                 /*inline data */
937                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
938                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
939                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
940                 *command_ptr++ = 0;
941
942                 x_inner -= 2;
943                 y_inner += 1;
944             }
945             x_outer += 1;
946         }
947
948         xtemp_outer = mb_width - 2;
949         if (xtemp_outer < 0)
950             xtemp_outer = 0;
951         x_outer = xtemp_outer;
952         y_outer = 0;
953         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
954             y_inner = y_outer;
955             x_inner = x_outer;
956             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
957                 mb_intra_ub = 0;
958                 score_dep = 0;
959                 if (x_inner != 0) {
960                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
961                     score_dep |= MB_SCOREBOARD_A;
962                 }
963                 if (y_inner != 0) {
964                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
965                     score_dep |= MB_SCOREBOARD_B;
966
967                     if (x_inner != 0)
968                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
969
970                     if (x_inner != (mb_width -1)) {
971                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
972                         score_dep |= MB_SCOREBOARD_C;
973                     }
974                 }
975
976                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
977                 *command_ptr++ = kernel;
978                 *command_ptr++ = MPEG2_SCOREBOARD;
979                 /* Indirect data */
980                 *command_ptr++ = 0;
981                 /* the (X, Y) term of scoreboard */
982                 *command_ptr++ = ((y_inner << 16) | x_inner);
983                 *command_ptr++ = score_dep;
984                 /*inline data */
985                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
986                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
987
988                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
989                 *command_ptr++ = 0;
990                 x_inner -= 2;
991                 y_inner += 1;
992             }
993             x_outer++;
994             if (x_outer >= mb_width) {
995                 y_outer += 1;
996                 x_outer = xtemp_outer;
997             }
998         }
999     }
1000
1001     *command_ptr++ = MI_BATCH_BUFFER_END;
1002     *command_ptr++ = 0;
1003
1004     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1005     return;
1006 }
1007
1008 static void
1009 gen9_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1010                                     struct encode_state *encode_state,
1011                                     int mb_width, int mb_height,
1012                                     int kernel,
1013                                     int transform_8x8_mode_flag,
1014                                     struct intel_encoder_context *encoder_context)
1015 {
1016     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1017     int mb_x = 0, mb_y = 0;
1018     int i, s, j;
1019     unsigned int *command_ptr;
1020
1021
1022     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1023     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1024
1025     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1026         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1027
1028         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1029             int slice_mb_begin = slice_param->macroblock_address;
1030             int slice_mb_number = slice_param->num_macroblocks;
1031             unsigned int mb_intra_ub;
1032
1033             for (i = 0; i < slice_mb_number;) {
1034                 int mb_count = i + slice_mb_begin;
1035
1036                 mb_x = mb_count % mb_width;
1037                 mb_y = mb_count / mb_width;
1038                 mb_intra_ub = 0;
1039
1040                 if (mb_x != 0) {
1041                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1042                 }
1043
1044                 if (mb_y != 0) {
1045                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1046
1047                     if (mb_x != 0)
1048                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1049
1050                     if (mb_x != (mb_width -1))
1051                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1052                 }
1053
1054                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1055                 *command_ptr++ = kernel;
1056                 *command_ptr++ = 0;
1057                 *command_ptr++ = 0;
1058                 *command_ptr++ = 0;
1059                 *command_ptr++ = 0;
1060
1061                 /*inline data */
1062                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1063                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1064
1065                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1066                 *command_ptr++ = 0;
1067                 i += 1;
1068             }
1069
1070             slice_param++;
1071         }
1072     }
1073
1074     *command_ptr++ = MI_BATCH_BUFFER_END;
1075     *command_ptr++ = 0;
1076
1077     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1078 }
1079
1080 static void
1081 gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1082                                    struct encode_state *encode_state,
1083                                    int is_intra,
1084                                    struct intel_encoder_context *encoder_context)
1085 {
1086     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1087     struct intel_batchbuffer *batch = encoder_context->base.batch;
1088     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1089     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1090     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1091     bool allow_hwscore = true;
1092     int s;
1093     int kernel_shader;
1094     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1095
1096     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1097         int j;
1098         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1099
1100         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1101             if (slice_param->macroblock_address % width_in_mbs) {
1102                 allow_hwscore = false;
1103                 break;
1104             }
1105         }
1106     }
1107
1108     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1109     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1110         allow_hwscore = false;
1111         kernel_shader = VME_INTRA_SHADER;
1112     } else {
1113         kernel_shader = VME_INTER_SHADER;
1114     }
1115
1116     if (allow_hwscore)
1117         gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1118                                                    encode_state,
1119                                                    width_in_mbs, height_in_mbs,
1120                                                    kernel_shader,
1121                                                    encoder_context);
1122     else
1123         gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
1124                                             encode_state,
1125                                             width_in_mbs, height_in_mbs,
1126                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1127                                             0,
1128                                             encoder_context);
1129
1130     intel_batchbuffer_start_atomic(batch, 0x1000);
1131     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1132     BEGIN_BATCH(batch, 4);
1133     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1134     OUT_RELOC(batch,
1135               vme_context->vme_batchbuffer.bo,
1136               I915_GEM_DOMAIN_COMMAND, 0,
1137               0);
1138     OUT_BATCH(batch, 0);
1139     OUT_BATCH(batch, 0);
1140     ADVANCE_BATCH(batch);
1141
1142     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1143
1144     intel_batchbuffer_end_atomic(batch);
1145 }
1146
1147 static VAStatus
1148 gen9_vme_mpeg2_prepare(VADriverContextP ctx,
1149                        struct encode_state *encode_state,
1150                        struct intel_encoder_context *encoder_context)
1151 {
1152     VAStatus vaStatus = VA_STATUS_SUCCESS;
1153     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1154     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1156
1157     if ((!vme_context->mpeg2_level) ||
1158         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1159             vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1160     }
1161
1162     /*Setup all the memory object*/
1163     gen9_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1164     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1165     //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1166     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1167     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1168
1169     /*Programing media pipeline*/
1170     gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1171
1172     return vaStatus;
1173 }
1174
1175 static VAStatus
1176 gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
1177                         VAProfile profile,
1178                         struct encode_state *encode_state,
1179                         struct intel_encoder_context *encoder_context)
1180 {
1181     gen9_vme_media_init(ctx, encoder_context);
1182     gen9_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1183     gen9_vme_run(ctx, encode_state, encoder_context);
1184     gen9_vme_stop(ctx, encode_state, encoder_context);
1185
1186     return VA_STATUS_SUCCESS;
1187 }
1188
1189 static void
1190 gen9_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1191                                    struct encode_state *encode_state,
1192                                    int index,
1193                                    int is_intra,
1194                                    struct intel_encoder_context *encoder_context)
1195 {
1196     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1197     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1198     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1199
1200     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1201 }
1202
1203 static void
1204 gen9_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1205                                             struct encode_state *encode_state,
1206                                             int index,
1207                                             struct intel_encoder_context *encoder_context)
1208 {
1209     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1210     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1211     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1212
1213     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1214 }
1215
1216 static VAStatus
1217 gen9_vme_vp8_surface_setup(VADriverContextP ctx,
1218                              struct encode_state *encode_state,
1219                              int is_intra,
1220                              struct intel_encoder_context *encoder_context)
1221 {
1222     struct object_surface *obj_surface;
1223
1224     /*Setup surfaces state*/
1225     /* current picture for encoding */
1226     obj_surface = encode_state->input_yuv_object;
1227     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1228     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1229     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1230
1231     if (!is_intra) {
1232         /* reference 0 */
1233         obj_surface = encode_state->reference_objects[0];
1234
1235         if (obj_surface->bo != NULL)
1236             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1237
1238         /* reference 1 */
1239         obj_surface = encode_state->reference_objects[1];
1240
1241         if (obj_surface && obj_surface->bo != NULL)
1242             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1243     }
1244
1245     /* VME output */
1246     gen9_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1247     gen9_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1248
1249     return VA_STATUS_SUCCESS;
1250 }
1251
1252 static void
1253 gen9_vme_vp8_pipeline_programing(VADriverContextP ctx,
1254                                    struct encode_state *encode_state,
1255                                    int is_intra,
1256                                    struct intel_encoder_context *encoder_context)
1257 {
1258     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1259     struct intel_batchbuffer *batch = encoder_context->base.batch;
1260     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1261     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1262     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1263     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1264
1265     gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1266                                                  encode_state,
1267                                                  width_in_mbs, height_in_mbs,
1268                                                  kernel_shader,
1269                                                  encoder_context);
1270
1271     intel_batchbuffer_start_atomic(batch, 0x1000);
1272     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1273     BEGIN_BATCH(batch, 4);
1274     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1275     OUT_RELOC(batch,
1276               vme_context->vme_batchbuffer.bo,
1277               I915_GEM_DOMAIN_COMMAND, 0,
1278               0);
1279     OUT_BATCH(batch, 0);
1280     OUT_BATCH(batch, 0);
1281     ADVANCE_BATCH(batch);
1282
1283     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1284
1285     intel_batchbuffer_end_atomic(batch);
1286 }
1287
1288 static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
1289                                  struct encode_state *encode_state,
1290                                  struct intel_encoder_context *encoder_context)
1291 {
1292     VAStatus vaStatus = VA_STATUS_SUCCESS;
1293     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1294     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1295
1296     /* update vp8 mbmv cost */
1297     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1298
1299     /*Setup all the memory object*/
1300     gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1301     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1302     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1303
1304     /*Programing media pipeline*/
1305     gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1306
1307     return vaStatus;
1308 }
1309
1310 static VAStatus
1311 gen9_vme_vp8_pipeline(VADriverContextP ctx,
1312                         VAProfile profile,
1313                         struct encode_state *encode_state,
1314                         struct intel_encoder_context *encoder_context)
1315 {
1316     gen9_vme_media_init(ctx, encoder_context);
1317     gen9_vme_vp8_prepare(ctx, encode_state, encoder_context);
1318     gen9_vme_run(ctx, encode_state, encoder_context);
1319     gen9_vme_stop(ctx, encode_state, encoder_context);
1320
1321     return VA_STATUS_SUCCESS;
1322 }
1323
1324 /* HEVC */
1325
1326 static void
1327 gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
1328                              struct encode_state *encode_state,
1329                              int index,
1330                              struct intel_encoder_context *encoder_context)
1331
1332 {
1333     struct i965_driver_data *i965 = i965_driver_data(ctx);
1334     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1335     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1336     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1337     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1338     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1339     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1340
1341
1342     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
1343     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1344
1345     if (is_intra)
1346         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
1347     else
1348         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
1349     /*
1350      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
1351      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
1352      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
1353      */
1354
1355     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1356                                               "VME output buffer",
1357                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
1358                                               0x1000);
1359     assert(vme_context->vme_output.bo);
1360     vme_context->vme_buffer_suface_setup(ctx,
1361                                          &vme_context->gpe_context,
1362                                          &vme_context->vme_output,
1363                                          BINDING_TABLE_OFFSET(index),
1364                                          SURFACE_STATE_OFFSET(index));
1365 }
1366
1367 static void
1368 gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
1369                                       struct encode_state *encode_state,
1370                                       int index,
1371                                       struct intel_encoder_context *encoder_context)
1372
1373 {
1374     struct i965_driver_data *i965 = i965_driver_data(ctx);
1375     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1376     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1377     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1378     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1379
1380     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
1381     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
1382     vme_context->vme_batchbuffer.pitch = 16;
1383     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
1384                                                    "VME batchbuffer",
1385                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
1386                                                    0x1000);
1387 }
1388 static VAStatus
1389 gen9_vme_hevc_surface_setup(VADriverContextP ctx,
1390                        struct encode_state *encode_state,
1391                        int is_intra,
1392                        struct intel_encoder_context *encoder_context)
1393 {
1394     struct object_surface *obj_surface;
1395
1396     /*Setup surfaces state*/
1397     /* current picture for encoding */
1398     obj_surface = encode_state->input_yuv_object;
1399     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1400     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1401     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1402
1403     if (!is_intra) {
1404         VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1405         int slice_type;
1406
1407         slice_type = slice_param->slice_type;
1408         assert(slice_type != HEVC_SLICE_I);
1409
1410         /* to do HEVC */
1411         intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
1412
1413         if (slice_type == HEVC_SLICE_B)
1414             intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
1415     }
1416
1417     /* VME output */
1418     gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
1419     gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1420
1421     return VA_STATUS_SUCCESS;
1422 }
1423 static void
1424 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1425                                      struct encode_state *encode_state,
1426                                      int mb_width, int mb_height,
1427                                      int kernel,
1428                                      int transform_8x8_mode_flag,
1429                                      struct intel_encoder_context *encoder_context)
1430 {
1431     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1432     int mb_row;
1433     int s;
1434     unsigned int *command_ptr;
1435     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1436     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1437     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1438     int ctb_size = 1 << log2_ctb_size;
1439     int num_mb_in_ctb = (ctb_size + 15)/16;
1440     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1441
1442 #define         USE_SCOREBOARD          (1 << 21)
1443
1444     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1445     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1446
1447     /*slice_segment_address  must picture_width_in_ctb alainment */
1448     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1449         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1450         int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1451         int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1452         unsigned int mb_intra_ub, score_dep;
1453         int x_outer, y_outer, x_inner, y_inner;
1454         int xtemp_outer = 0;
1455
1456         x_outer = first_mb % mb_width;
1457         y_outer = first_mb / mb_width;
1458         mb_row = y_outer;
1459
1460         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1461             x_inner = x_outer;
1462             y_inner = y_outer;
1463             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1464                 mb_intra_ub = 0;
1465                 score_dep = 0;
1466                 if (x_inner != 0) {
1467                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1468                     score_dep |= MB_SCOREBOARD_A;
1469                 }
1470                 if (y_inner != mb_row) {
1471                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1472                     score_dep |= MB_SCOREBOARD_B;
1473                     if (x_inner != 0)
1474                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1475                     if (x_inner != (mb_width -1)) {
1476                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1477                         score_dep |= MB_SCOREBOARD_C;
1478                     }
1479                 }
1480
1481                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1482                 *command_ptr++ = kernel;
1483                 *command_ptr++ = USE_SCOREBOARD;
1484                 /* Indirect data */
1485                 *command_ptr++ = 0;
1486                 /* the (X, Y) term of scoreboard */
1487                 *command_ptr++ = ((y_inner << 16) | x_inner);
1488                 *command_ptr++ = score_dep;
1489                 /*inline data */
1490                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1491                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1492                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1493                 *command_ptr++ = 0;
1494
1495                 x_inner -= 2;
1496                 y_inner += 1;
1497             }
1498             x_outer += 1;
1499         }
1500
1501         xtemp_outer = mb_width - 2;
1502         if (xtemp_outer < 0)
1503             xtemp_outer = 0;
1504         x_outer = xtemp_outer;
1505         y_outer = first_mb / mb_width;
1506         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1507             y_inner = y_outer;
1508             x_inner = x_outer;
1509             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1510                 mb_intra_ub = 0;
1511                 score_dep = 0;
1512                 if (x_inner != 0) {
1513                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1514                     score_dep |= MB_SCOREBOARD_A;
1515                 }
1516                 if (y_inner != mb_row) {
1517                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1518                     score_dep |= MB_SCOREBOARD_B;
1519                     if (x_inner != 0)
1520                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1521
1522                     if (x_inner != (mb_width -1)) {
1523                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1524                         score_dep |= MB_SCOREBOARD_C;
1525                     }
1526                 }
1527
1528                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1529                 *command_ptr++ = kernel;
1530                 *command_ptr++ = USE_SCOREBOARD;
1531                 /* Indirect data */
1532                 *command_ptr++ = 0;
1533                 /* the (X, Y) term of scoreboard */
1534                 *command_ptr++ = ((y_inner << 16) | x_inner);
1535                 *command_ptr++ = score_dep;
1536                 /*inline data */
1537                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1538                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1539
1540                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1541                 *command_ptr++ = 0;
1542                 x_inner -= 2;
1543                 y_inner += 1;
1544             }
1545             x_outer++;
1546             if (x_outer >= mb_width) {
1547                 y_outer += 1;
1548                 x_outer = xtemp_outer;
1549             }
1550         }
1551     }
1552
1553     *command_ptr++ = MI_BATCH_BUFFER_END;
1554     *command_ptr++ = 0;
1555
1556     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1557 }
1558
1559 static void
1560 gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
1561                               struct encode_state *encode_state,
1562                               int mb_width, int mb_height,
1563                               int kernel,
1564                               int transform_8x8_mode_flag,
1565                               struct intel_encoder_context *encoder_context)
1566 {
1567     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1568     int mb_x = 0, mb_y = 0;
1569     int i, s;
1570     unsigned int *command_ptr;
1571     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1572     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1573     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1574
1575     int ctb_size = 1 << log2_ctb_size;
1576     int num_mb_in_ctb = (ctb_size + 15)/16;
1577     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1578
1579     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1580     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1581
1582     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1583         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1584         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1585         int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1586
1587         unsigned int mb_intra_ub;
1588         int slice_mb_x = slice_mb_begin % mb_width;
1589         for (i = 0; i < slice_mb_number;  ) {
1590             int mb_count = i + slice_mb_begin;
1591             mb_x = mb_count % mb_width;
1592             mb_y = mb_count / mb_width;
1593             mb_intra_ub = 0;
1594
1595             if (mb_x != 0) {
1596                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1597             }
1598             if (mb_y != 0) {
1599                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1600                 if (mb_x != 0)
1601                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1602                 if (mb_x != (mb_width -1))
1603                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1604             }
1605             if (i < mb_width) {
1606                 if (i == 0)
1607                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
1608                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
1609                 if ((i == (mb_width - 1)) && slice_mb_x) {
1610                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1611                 }
1612             }
1613
1614             if ((i == mb_width) && slice_mb_x) {
1615                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
1616             }
1617
1618             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1619             *command_ptr++ = kernel;
1620             *command_ptr++ = 0;
1621             *command_ptr++ = 0;
1622             *command_ptr++ = 0;
1623             *command_ptr++ = 0;
1624
1625             /*inline data */
1626             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1627             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1628
1629             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1630             *command_ptr++ = 0;
1631             i += 1;
1632         }
1633     }
1634
1635     *command_ptr++ = MI_BATCH_BUFFER_END;
1636     *command_ptr++ = 0;
1637
1638     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1639 }
1640
1641 static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
1642                                          struct encode_state *encode_state,
1643                                          struct intel_encoder_context *encoder_context)
1644 {
1645     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1646     struct intel_batchbuffer *batch = encoder_context->base.batch;
1647     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1648     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1649     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1650     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1651     int kernel_shader;
1652     bool allow_hwscore = true;
1653     int s;
1654
1655     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1656     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1657
1658     int ctb_size = 1 << log2_ctb_size;
1659     int num_mb_in_ctb = (ctb_size + 15)/16;
1660     int transform_8x8_mode_flag = 1;
1661     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1662
1663     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1664         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1665         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1666         if ((slice_mb_begin % width_in_mbs)) {
1667             allow_hwscore = false;
1668             break;
1669         }
1670     }
1671
1672     if (pSliceParameter->slice_type == HEVC_SLICE_I) {
1673         kernel_shader = VME_INTRA_SHADER;
1674     } else if (pSliceParameter->slice_type == HEVC_SLICE_P) {
1675         kernel_shader = VME_INTER_SHADER;
1676     } else {
1677         kernel_shader = VME_BINTER_SHADER;
1678         if (!allow_hwscore)
1679             kernel_shader = VME_INTER_SHADER;
1680     }
1681     if (allow_hwscore)
1682         gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
1683                                                encode_state,
1684                                                width_in_mbs, height_in_mbs,
1685                                                kernel_shader,
1686                                                transform_8x8_mode_flag,
1687                                                encoder_context);
1688     else
1689         gen9_vme_hevc_fill_vme_batchbuffer(ctx,
1690                                       encode_state,
1691                                       width_in_mbs, height_in_mbs,
1692                                       kernel_shader,
1693                                       transform_8x8_mode_flag,
1694                                       encoder_context);
1695
1696     intel_batchbuffer_start_atomic(batch, 0x1000);
1697     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1698     BEGIN_BATCH(batch, 3);
1699     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1700     OUT_RELOC(batch,
1701               vme_context->vme_batchbuffer.bo,
1702               I915_GEM_DOMAIN_COMMAND, 0,
1703               0);
1704     OUT_BATCH(batch, 0);
1705     ADVANCE_BATCH(batch);
1706
1707     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1708
1709     intel_batchbuffer_end_atomic(batch);
1710 }
1711
1712 static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
1713                                  struct encode_state *encode_state,
1714                                  struct intel_encoder_context *encoder_context)
1715 {
1716     VAStatus vaStatus = VA_STATUS_SUCCESS;
1717     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1718     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1719     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1720     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1721
1722     /* here use the avc level for hevc vme */
1723     if (!vme_context->hevc_level ||
1724         (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
1725         vme_context->hevc_level = pSequenceParameter->general_level_idc;
1726     }
1727
1728     intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
1729
1730     /*Setup all the memory object*/
1731     gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
1732     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1733     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
1734     gen9_vme_constant_setup(ctx, encode_state, encoder_context);
1735
1736     /*Programing media pipeline*/
1737     gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
1738
1739     return vaStatus;
1740 }
1741
1742
1743 static VAStatus
1744 gen9_vme_hevc_pipeline(VADriverContextP ctx,
1745                   VAProfile profile,
1746                   struct encode_state *encode_state,
1747                   struct intel_encoder_context *encoder_context)
1748 {
1749     gen9_vme_media_init(ctx, encoder_context);
1750     gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
1751     gen9_vme_run(ctx, encode_state, encoder_context);
1752     gen9_vme_stop(ctx, encode_state, encoder_context);
1753
1754     return VA_STATUS_SUCCESS;
1755 }
1756
1757
1758 static void
1759 gen9_vme_context_destroy(void *context)
1760 {
1761     struct gen6_vme_context *vme_context = context;
1762
1763     gen8_gpe_context_destroy(&vme_context->gpe_context);
1764
1765     dri_bo_unreference(vme_context->vme_output.bo);
1766     vme_context->vme_output.bo = NULL;
1767
1768     dri_bo_unreference(vme_context->vme_state.bo);
1769     vme_context->vme_state.bo = NULL;
1770
1771     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1772     vme_context->vme_batchbuffer.bo = NULL;
1773
1774     if (vme_context->vme_state_message) {
1775         free(vme_context->vme_state_message);
1776         vme_context->vme_state_message = NULL;
1777     }
1778
1779     free(vme_context->qp_per_mb);
1780     vme_context->qp_per_mb = NULL;
1781
1782     free(vme_context);
1783 }
1784
1785 Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1786 {
1787     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1788     struct i965_kernel *vme_kernel_list = NULL;
1789     int i965_kernel_num;
1790
1791     switch (encoder_context->codec) {
1792     case CODEC_H264:
1793     case CODEC_H264_MVC:
1794         vme_kernel_list = gen9_vme_kernels;
1795         encoder_context->vme_pipeline = gen9_vme_pipeline;
1796         i965_kernel_num = sizeof(gen9_vme_kernels) / sizeof(struct i965_kernel);
1797         break;
1798
1799     case CODEC_MPEG2:
1800         vme_kernel_list = gen9_vme_mpeg2_kernels;
1801         encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
1802         i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
1803         break;
1804
1805     case CODEC_VP8:
1806         vme_kernel_list = gen9_vme_vp8_kernels;
1807         encoder_context->vme_pipeline = gen9_vme_vp8_pipeline;
1808         i965_kernel_num = sizeof(gen9_vme_vp8_kernels) / sizeof(struct i965_kernel);
1809         break;
1810
1811     case CODEC_HEVC:
1812         vme_kernel_list = gen9_vme_hevc_kernels;
1813         encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
1814         i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
1815         break;
1816
1817     default:
1818         /* never get here */
1819         assert(0);
1820
1821         break;
1822     }
1823
1824     assert(vme_context);
1825     vme_context->vme_kernel_sum = i965_kernel_num;
1826     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1827
1828     vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
1829     vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
1830     vme_context->gpe_context.sampler_size = 0;
1831
1832
1833     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1834     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1835     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1836     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1837     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1838
1839     gen7_vme_scoreboard_init(ctx, vme_context);
1840
1841     gen8_gpe_load_kernels(ctx,
1842                           &vme_context->gpe_context,
1843                           vme_kernel_list,
1844                           i965_kernel_num);
1845     vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1846     vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1847     vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1848     vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1849
1850     encoder_context->vme_context = vme_context;
1851     encoder_context->vme_context_destroy = gen9_vme_context_destroy;
1852
1853     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1854
1855     return True;
1856 }