OSDN Git Service

jpeg/dec: gen8+ set correct fourcc for monochrome decode
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vme.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #include "gen9_mfc.h"
44 #include "intel_media.h"
45 #include "gen9_vp9_encapi.h"
46
47 #ifdef SURFACE_STATE_PADDED_SIZE
48 #undef SURFACE_STATE_PADDED_SIZE
49 #endif
50
51 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
52 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
53 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
54
55 #define VME_INTRA_SHADER        0
56 #define VME_INTER_SHADER        1
57 #define VME_BINTER_SHADER       2
58
59 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
60 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
61 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
62
63 #define VME_MSG_LENGTH          32
64
65 static const uint32_t gen9_vme_intra_frame[][4] = {
66 #include "shaders/vme/intra_frame_gen9.g9b"
67 };
68
69 static const uint32_t gen9_vme_inter_frame[][4] = {
70 #include "shaders/vme/inter_frame_gen9.g9b"
71 };
72
73 static const uint32_t gen9_vme_inter_bframe[][4] = {
74 #include "shaders/vme/inter_bframe_gen9.g9b"
75 };
76
77 static struct i965_kernel gen9_vme_kernels[] = {
78     {
79         "VME Intra Frame",
80         VME_INTRA_SHADER, /*index*/
81         gen9_vme_intra_frame,
82         sizeof(gen9_vme_intra_frame),
83         NULL
84     },
85     {
86         "VME inter Frame",
87         VME_INTER_SHADER,
88         gen9_vme_inter_frame,
89         sizeof(gen9_vme_inter_frame),
90         NULL
91     },
92     {
93         "VME inter BFrame",
94         VME_BINTER_SHADER,
95         gen9_vme_inter_bframe,
96         sizeof(gen9_vme_inter_bframe),
97         NULL
98     }
99 };
100
101 static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = {
102 #include "shaders/vme/intra_frame_gen9.g9b"
103 };
104
105 static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = {
106 #include "shaders/vme/mpeg2_inter_gen9.g9b"
107 };
108
109 static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
110     {
111         "VME Intra Frame",
112         VME_INTRA_SHADER, /*index*/
113         gen9_vme_mpeg2_intra_frame,
114         sizeof(gen9_vme_mpeg2_intra_frame),
115         NULL
116     },
117     {
118         "VME inter Frame",
119         VME_INTER_SHADER,
120         gen9_vme_mpeg2_inter_frame,
121         sizeof(gen9_vme_mpeg2_inter_frame),
122         NULL
123     },
124 };
125
126 static const uint32_t gen9_vme_vp8_intra_frame[][4] = {
127 #include "shaders/vme/vp8_intra_frame_gen9.g9b"
128 };
129
130 static const uint32_t gen9_vme_vp8_inter_frame[][4] = {
131 #include "shaders/vme/vp8_inter_frame_gen9.g9b"
132 };
133
134 static struct i965_kernel gen9_vme_vp8_kernels[] = {
135     {
136         "VME Intra Frame",
137         VME_INTRA_SHADER, /*index*/
138         gen9_vme_vp8_intra_frame,
139         sizeof(gen9_vme_vp8_intra_frame),
140         NULL
141     },
142     {
143         "VME inter Frame",
144         VME_INTER_SHADER,
145         gen9_vme_vp8_inter_frame,
146         sizeof(gen9_vme_vp8_inter_frame),
147         NULL
148     },
149 };
150
151 /* HEVC */
152
153 static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
154 #include "shaders/vme/intra_frame_gen9.g9b"
155 };
156
157 static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
158 #include "shaders/vme/inter_frame_gen9.g9b"
159 };
160
161 static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
162 #include "shaders/vme/inter_bframe_gen9.g9b"
163 };
164
165 static struct i965_kernel gen9_vme_hevc_kernels[] = {
166     {
167         "VME Intra Frame",
168         VME_INTRA_SHADER, /*index*/
169         gen9_vme_hevc_intra_frame,
170         sizeof(gen9_vme_hevc_intra_frame),
171         NULL
172     },
173     {
174         "VME inter Frame",
175         VME_INTER_SHADER,
176         gen9_vme_hevc_inter_frame,
177         sizeof(gen9_vme_hevc_inter_frame),
178         NULL
179     },
180     {
181         "VME inter BFrame",
182         VME_BINTER_SHADER,
183         gen9_vme_hevc_inter_bframe,
184         sizeof(gen9_vme_hevc_inter_bframe),
185         NULL
186     }
187 };
188 /* only used for VME source surface state */
189 static void
190 gen9_vme_source_surface_state(VADriverContextP ctx,
191                               int index,
192                               struct object_surface *obj_surface,
193                               struct intel_encoder_context *encoder_context)
194 {
195     struct gen6_vme_context *vme_context = encoder_context->vme_context;
196
197     vme_context->vme_surface2_setup(ctx,
198                                     &vme_context->gpe_context,
199                                     obj_surface,
200                                     BINDING_TABLE_OFFSET(index),
201                                     SURFACE_STATE_OFFSET(index));
202 }
203
204 static void
205 gen9_vme_media_source_surface_state(VADriverContextP ctx,
206                                     int index,
207                                     struct object_surface *obj_surface,
208                                     struct intel_encoder_context *encoder_context)
209 {
210     struct gen6_vme_context *vme_context = encoder_context->vme_context;
211
212     vme_context->vme_media_rw_surface_setup(ctx,
213                                             &vme_context->gpe_context,
214                                             obj_surface,
215                                             BINDING_TABLE_OFFSET(index),
216                                             SURFACE_STATE_OFFSET(index),
217                                             0);
218 }
219
220 static void
221 gen9_vme_media_chroma_source_surface_state(VADriverContextP ctx,
222                                            int index,
223                                            struct object_surface *obj_surface,
224                                            struct intel_encoder_context *encoder_context)
225 {
226     struct gen6_vme_context *vme_context = encoder_context->vme_context;
227
228     vme_context->vme_media_chroma_surface_setup(ctx,
229                                                 &vme_context->gpe_context,
230                                                 obj_surface,
231                                                 BINDING_TABLE_OFFSET(index),
232                                                 SURFACE_STATE_OFFSET(index),
233                                                 0);
234 }
235
236 static void
237 gen9_vme_output_buffer_setup(VADriverContextP ctx,
238                              struct encode_state *encode_state,
239                              int index,
240                              struct intel_encoder_context *encoder_context,
241                              int is_intra,
242                              int width_in_mbs,
243                              int height_in_mbs)
244
245 {
246     struct i965_driver_data *i965 = i965_driver_data(ctx);
247     struct gen6_vme_context *vme_context = encoder_context->vme_context;
248
249     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
250     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
251
252     if (is_intra)
253         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
254     else
255         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
256     /*
257      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
258      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
259      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
260      */
261
262     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
263                                               "VME output buffer",
264                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
265                                               0x1000);
266     assert(vme_context->vme_output.bo);
267     vme_context->vme_buffer_suface_setup(ctx,
268                                          &vme_context->gpe_context,
269                                          &vme_context->vme_output,
270                                          BINDING_TABLE_OFFSET(index),
271                                          SURFACE_STATE_OFFSET(index));
272 }
273
274 static void
275 gen9_vme_avc_output_buffer_setup(VADriverContextP ctx,
276                              struct encode_state *encode_state,
277                              int index,
278                              struct intel_encoder_context *encoder_context)
279 {
280     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
281     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
282     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
283     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
284     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
285
286     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
287
288 }
289
290 static void
291 gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
292                                       struct encode_state *encode_state,
293                                       int index,
294                                       struct intel_encoder_context *encoder_context,
295                                       int width_in_mbs,
296                                       int height_in_mbs)
297 {
298     struct i965_driver_data *i965 = i965_driver_data(ctx);
299     struct gen6_vme_context *vme_context = encoder_context->vme_context;
300
301     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
302     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
303     vme_context->vme_batchbuffer.pitch = 16;
304     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
305                                                    "VME batchbuffer",
306                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
307                                                    0x1000);
308     vme_context->vme_buffer_suface_setup(ctx,
309                                          &vme_context->gpe_context,
310                                          &vme_context->vme_batchbuffer,
311                                          BINDING_TABLE_OFFSET(index),
312                                          SURFACE_STATE_OFFSET(index));
313 }
314
315 static void
316 gen9_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
317                                       struct encode_state *encode_state,
318                                       int index,
319                                       struct intel_encoder_context *encoder_context)
320 {
321     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
322     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
323     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
324
325     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
326 }
327
328                                       
329 static VAStatus
330 gen9_vme_surface_setup(VADriverContextP ctx,
331                        struct encode_state *encode_state,
332                        int is_intra,
333                        struct intel_encoder_context *encoder_context)
334 {
335     struct object_surface *obj_surface;
336
337     /*Setup surfaces state*/
338     /* current picture for encoding */
339     obj_surface = encode_state->input_yuv_object;
340     assert(obj_surface);
341     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
342     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
343     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
344
345     if (!is_intra) {
346         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
347         int slice_type;
348
349         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
350         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
351
352         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
353
354         if (slice_type == SLICE_TYPE_B)
355             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
356     }
357
358     /* VME output */
359     gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
360     gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
361     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
362                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
363                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
364
365     return VA_STATUS_SUCCESS;
366 }
367
368 static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
369                                          struct encode_state *encode_state,
370                                          struct intel_encoder_context *encoder_context)
371 {
372     struct gen6_vme_context *vme_context = encoder_context->vme_context;
373     struct gen8_interface_descriptor_data *desc;
374     int i;
375     dri_bo *bo;
376     unsigned char *desc_ptr;
377
378     bo = vme_context->gpe_context.dynamic_state.bo;
379     dri_bo_map(bo, 1);
380     assert(bo->virtual);
381     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
382
383     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
384
385     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
386         struct i965_kernel *kernel;
387         kernel = &vme_context->gpe_context.kernels[i];
388         assert(sizeof(*desc) == 32);
389         /*Setup the descritor table*/
390         memset(desc, 0, sizeof(*desc));
391         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
392         desc->desc3.sampler_count = 0; /* FIXME: */
393         desc->desc3.sampler_state_pointer = 0;
394         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
395         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
396         desc->desc5.constant_urb_entry_read_offset = 0;
397         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
398
399         desc++;
400     }
401
402     dri_bo_unmap(bo);
403
404     return VA_STATUS_SUCCESS;
405 }
406
407 static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
408                                         struct encode_state *encode_state,
409                                         struct intel_encoder_context *encoder_context,
410                                         int denom)
411 {
412     struct gen6_vme_context *vme_context = encoder_context->vme_context;
413     unsigned char *constant_buffer;
414     unsigned int *vme_state_message;
415     int mv_num = 32;
416
417     vme_state_message = (unsigned int *)vme_context->vme_state_message;
418
419     if (encoder_context->codec == CODEC_H264 ||
420         encoder_context->codec == CODEC_H264_MVC) {
421         if (vme_context->h264_level >= 30) {
422             mv_num = 16 / denom;
423
424             if (vme_context->h264_level >= 31)
425                 mv_num = 8 / denom;
426         }
427     } else if (encoder_context->codec == CODEC_MPEG2) {
428         mv_num = 2 / denom;
429     }else if (encoder_context->codec == CODEC_HEVC) {
430         if (vme_context->hevc_level >= 30*3) {
431             mv_num = 16;
432
433             if (vme_context->hevc_level >= 31*3)
434                 mv_num = 8;
435         }/* use the avc level setting */
436     }
437
438     vme_state_message[31] = mv_num;
439
440     dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
441     assert(vme_context->gpe_context.dynamic_state.bo->virtual);
442     constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
443                                          vme_context->gpe_context.curbe_offset;
444
445     /* VME MV/Mb cost table is passed by using const buffer */
446     /* Now it uses the fixed search path. So it is constructed directly
447      * in the GPU shader.
448      */
449     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
450
451     dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
452
453     return VA_STATUS_SUCCESS;
454 }
455
456 #define         MB_SCOREBOARD_A         (1 << 0)
457 #define         MB_SCOREBOARD_B         (1 << 1)
458 #define         MB_SCOREBOARD_C         (1 << 2)
459
460 /* check whether the mb of (x_index, y_index) is out of bound */
461 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
462 {
463     int mb_index;
464     if (x_index < 0 || x_index >= mb_width)
465         return -1;
466     if (y_index < 0 || y_index >= mb_height)
467         return -1;
468
469     mb_index = y_index * mb_width + x_index;
470     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
471         return -1;
472     return 0;
473 }
474
475 static void
476 gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
477                                      struct encode_state *encode_state,
478                                      int mb_width, int mb_height,
479                                      int kernel,
480                                      int transform_8x8_mode_flag,
481                                      struct intel_encoder_context *encoder_context)
482 {
483     struct gen6_vme_context *vme_context = encoder_context->vme_context;
484     int mb_row;
485     int s;
486     unsigned int *command_ptr;
487
488 #define         USE_SCOREBOARD          (1 << 21)
489
490     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
491     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
492
493     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
494         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
495         int first_mb = pSliceParameter->macroblock_address;
496         int num_mb = pSliceParameter->num_macroblocks;
497         unsigned int mb_intra_ub, score_dep;
498         int x_outer, y_outer, x_inner, y_inner;
499         int xtemp_outer = 0;
500
501         x_outer = first_mb % mb_width;
502         y_outer = first_mb / mb_width;
503         mb_row = y_outer;
504
505         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
506             x_inner = x_outer;
507             y_inner = y_outer;
508             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
509                 mb_intra_ub = 0;
510                 score_dep = 0;
511                 if (x_inner != 0) {
512                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
513                     score_dep |= MB_SCOREBOARD_A;
514                 }
515                 if (y_inner != mb_row) {
516                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
517                     score_dep |= MB_SCOREBOARD_B;
518                     if (x_inner != 0)
519                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
520                     if (x_inner != (mb_width -1)) {
521                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
522                         score_dep |= MB_SCOREBOARD_C;
523                     }
524                 }
525
526                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
527                 *command_ptr++ = kernel;
528                 *command_ptr++ = USE_SCOREBOARD;
529                 /* Indirect data */
530                 *command_ptr++ = 0;
531                 /* the (X, Y) term of scoreboard */
532                 *command_ptr++ = ((y_inner << 16) | x_inner);
533                 *command_ptr++ = score_dep;
534                 /*inline data */
535                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
536                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
537                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
538                 *command_ptr++ = 0;
539
540                 x_inner -= 2;
541                 y_inner += 1;
542             }
543             x_outer += 1;
544         }
545
546         xtemp_outer = mb_width - 2;
547         if (xtemp_outer < 0)
548             xtemp_outer = 0;
549         x_outer = xtemp_outer;
550         y_outer = first_mb / mb_width;
551         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
552             y_inner = y_outer;
553             x_inner = x_outer;
554             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
555                 mb_intra_ub = 0;
556                 score_dep = 0;
557                 if (x_inner != 0) {
558                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
559                     score_dep |= MB_SCOREBOARD_A;
560                 }
561                 if (y_inner != mb_row) {
562                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
563                     score_dep |= MB_SCOREBOARD_B;
564                     if (x_inner != 0)
565                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
566
567                     if (x_inner != (mb_width -1)) {
568                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
569                         score_dep |= MB_SCOREBOARD_C;
570                     }
571                 }
572
573                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
574                 *command_ptr++ = kernel;
575                 *command_ptr++ = USE_SCOREBOARD;
576                 /* Indirect data */
577                 *command_ptr++ = 0;
578                 /* the (X, Y) term of scoreboard */
579                 *command_ptr++ = ((y_inner << 16) | x_inner);
580                 *command_ptr++ = score_dep;
581                 /*inline data */
582                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
583                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
584
585                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
586                 *command_ptr++ = 0;
587                 x_inner -= 2;
588                 y_inner += 1;
589             }
590             x_outer++;
591             if (x_outer >= mb_width) {
592                 y_outer += 1;
593                 x_outer = xtemp_outer;
594             }
595         }
596     }
597
598     *command_ptr++ = MI_BATCH_BUFFER_END;
599     *command_ptr++ = 0;
600
601     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
602 }
603
604 static void
605 gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
606                               struct encode_state *encode_state,
607                               int mb_width, int mb_height,
608                               int kernel,
609                               int transform_8x8_mode_flag,
610                               struct intel_encoder_context *encoder_context)
611 {
612     struct gen6_vme_context *vme_context = encoder_context->vme_context;
613     int mb_x = 0, mb_y = 0;
614     int i, s;
615     unsigned int *command_ptr;
616     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
617     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
618     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
619     int qp;
620     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
621     int qp_mb, qp_index;
622
623     if (encoder_context->rate_control_mode == VA_RC_CQP)
624         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
625     else
626         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
627
628     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
629     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
630
631     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
632         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
633         int slice_mb_begin = pSliceParameter->macroblock_address;
634         int slice_mb_number = pSliceParameter->num_macroblocks;
635         unsigned int mb_intra_ub;
636         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
637         for (i = 0; i < slice_mb_number;  ) {
638             int mb_count = i + slice_mb_begin;
639             mb_x = mb_count % mb_width;
640             mb_y = mb_count / mb_width;
641             mb_intra_ub = 0;
642             if (mb_x != 0) {
643                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
644             }
645             if (mb_y != 0) {
646                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
647                 if (mb_x != 0)
648                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
649                 if (mb_x != (mb_width -1))
650                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
651             }
652             if (i < mb_width) {
653                 if (i == 0)
654                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
655                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
656                 if ((i == (mb_width - 1)) && slice_mb_x) {
657                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
658                 }
659             }
660
661             if ((i == mb_width) && slice_mb_x) {
662                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
663             }
664             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
665             *command_ptr++ = kernel;
666             *command_ptr++ = 0;
667             *command_ptr++ = 0;
668             *command_ptr++ = 0;
669             *command_ptr++ = 0;
670
671             /*inline data */
672             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
673             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
674             /* qp occupies one byte */
675             if (vme_context->roi_enabled) {
676                 qp_index = mb_y * mb_width + mb_x;
677                 qp_mb = *(vme_context->qp_per_mb + qp_index);
678             } else
679                 qp_mb = qp;
680             *command_ptr++ = qp_mb;
681
682             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
683             *command_ptr++ = 0;
684             i += 1;
685         }
686     }
687
688     *command_ptr++ = MI_BATCH_BUFFER_END;
689     *command_ptr++ = 0;
690
691     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
692 }
693
694 static void gen9_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
695 {
696     struct gen6_vme_context *vme_context = encoder_context->vme_context;
697
698     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
699
700     /* VME output buffer */
701     dri_bo_unreference(vme_context->vme_output.bo);
702     vme_context->vme_output.bo = NULL;
703
704     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
705     vme_context->vme_batchbuffer.bo = NULL;
706
707     /* VME state */
708     dri_bo_unreference(vme_context->vme_state.bo);
709     vme_context->vme_state.bo = NULL;
710 }
711
712 static void gen9_vme_pipeline_programing(VADriverContextP ctx,
713                                          struct encode_state *encode_state,
714                                          struct intel_encoder_context *encoder_context)
715 {
716     struct gen6_vme_context *vme_context = encoder_context->vme_context;
717     struct intel_batchbuffer *batch = encoder_context->base.batch;
718     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
719     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
720     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
721     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
722     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
723     int kernel_shader;
724     bool allow_hwscore = true;
725     int s;
726     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
727
728     if (is_low_quality)
729         allow_hwscore = false;
730     else {
731         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
732             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
733             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
734                 allow_hwscore = false;
735                 break;
736             }
737         }
738     }
739
740     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
741         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
742         kernel_shader = VME_INTRA_SHADER;
743     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
744                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
745         kernel_shader = VME_INTER_SHADER;
746     } else {
747         kernel_shader = VME_BINTER_SHADER;
748         if (!allow_hwscore)
749             kernel_shader = VME_INTER_SHADER;
750     }
751     if (allow_hwscore)
752         gen9wa_vme_walker_fill_vme_batchbuffer(ctx,
753                                                encode_state,
754                                                width_in_mbs, height_in_mbs,
755                                                kernel_shader,
756                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
757                                                encoder_context);
758     else
759         gen9_vme_fill_vme_batchbuffer(ctx,
760                                       encode_state,
761                                       width_in_mbs, height_in_mbs,
762                                       kernel_shader,
763                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
764                                       encoder_context);
765
766     intel_batchbuffer_start_atomic(batch, 0x1000);
767     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
768     BEGIN_BATCH(batch, 3);
769     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
770     OUT_RELOC(batch,
771               vme_context->vme_batchbuffer.bo,
772               I915_GEM_DOMAIN_COMMAND, 0,
773               0);
774     OUT_BATCH(batch, 0);
775     ADVANCE_BATCH(batch);
776
777     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
778
779     intel_batchbuffer_end_atomic(batch);
780 }
781
782 static VAStatus gen9_vme_prepare(VADriverContextP ctx,
783                                  struct encode_state *encode_state,
784                                  struct intel_encoder_context *encoder_context)
785 {
786     VAStatus vaStatus = VA_STATUS_SUCCESS;
787     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
788     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
789     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
790     struct gen6_vme_context *vme_context = encoder_context->vme_context;
791
792     if (!vme_context->h264_level ||
793         (vme_context->h264_level != pSequenceParameter->level_idc)) {
794             vme_context->h264_level = pSequenceParameter->level_idc;
795     }
796
797     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
798     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
799     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
800
801     /*Setup all the memory object*/
802     gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
803     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
804     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
805     gen9_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1);
806
807     /*Programing media pipeline*/
808     gen9_vme_pipeline_programing(ctx, encode_state, encoder_context);
809
810     return vaStatus;
811 }
812
813 static VAStatus gen9_vme_run(VADriverContextP ctx,
814                              struct encode_state *encode_state,
815                              struct intel_encoder_context *encoder_context)
816 {
817     struct intel_batchbuffer *batch = encoder_context->base.batch;
818
819     intel_batchbuffer_flush(batch);
820
821     return VA_STATUS_SUCCESS;
822 }
823
824 static VAStatus gen9_vme_stop(VADriverContextP ctx,
825                               struct encode_state *encode_state,
826                               struct intel_encoder_context *encoder_context)
827 {
828     return VA_STATUS_SUCCESS;
829 }
830
831 static VAStatus
832 gen9_vme_pipeline(VADriverContextP ctx,
833                   VAProfile profile,
834                   struct encode_state *encode_state,
835                   struct intel_encoder_context *encoder_context)
836 {
837     gen9_vme_media_init(ctx, encoder_context);
838     gen9_vme_prepare(ctx, encode_state, encoder_context);
839     gen9_vme_run(ctx, encode_state, encoder_context);
840     gen9_vme_stop(ctx, encode_state, encoder_context);
841
842     return VA_STATUS_SUCCESS;
843 }
844
845 static void
846 gen9_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
847                                    struct encode_state *encode_state,
848                                    int index,
849                                    int is_intra,
850                                    struct intel_encoder_context *encoder_context)
851
852 {
853     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
854     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
855     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
856
857     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
858 }
859
860 static void
861 gen9_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
862                                             struct encode_state *encode_state,
863                                             int index,
864                                             struct intel_encoder_context *encoder_context)
865
866 {
867     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
868     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
869     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
870
871     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
872 }
873
874 static VAStatus
875 gen9_vme_mpeg2_surface_setup(VADriverContextP ctx,
876                              struct encode_state *encode_state,
877                              int is_intra,
878                              struct intel_encoder_context *encoder_context)
879 {
880     struct object_surface *obj_surface;
881
882     /*Setup surfaces state*/
883     /* current picture for encoding */
884     obj_surface = encode_state->input_yuv_object;
885     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
886     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
887     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
888
889     if (!is_intra) {
890         /* reference 0 */
891         obj_surface = encode_state->reference_objects[0];
892
893         if (obj_surface->bo != NULL)
894             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
895
896         /* reference 1 */
897         obj_surface = encode_state->reference_objects[1];
898
899         if (obj_surface && obj_surface->bo != NULL)
900             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
901     }
902
903     /* VME output */
904     gen9_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
905     gen9_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
906
907     return VA_STATUS_SUCCESS;
908 }
909
910 static void
911 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
912                                            struct encode_state *encode_state,
913                                            int mb_width, int mb_height,
914                                            int kernel,
915                                            struct intel_encoder_context *encoder_context)
916 {
917     struct gen6_vme_context *vme_context = encoder_context->vme_context;
918     unsigned int *command_ptr;
919
920 #define         MPEG2_SCOREBOARD                (1 << 21)
921
922     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
923     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
924
925     {
926         unsigned int mb_intra_ub, score_dep;
927         int x_outer, y_outer, x_inner, y_inner;
928         int xtemp_outer = 0;
929         int first_mb = 0;
930         int num_mb = mb_width * mb_height;
931
932         x_outer = 0;
933         y_outer = 0;
934
935         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
936             x_inner = x_outer;
937             y_inner = y_outer;
938             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
939                 mb_intra_ub = 0;
940                 score_dep = 0;
941                 if (x_inner != 0) {
942                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
943                     score_dep |= MB_SCOREBOARD_A;
944                 }
945                 if (y_inner != 0) {
946                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
947                     score_dep |= MB_SCOREBOARD_B;
948
949                     if (x_inner != 0)
950                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
951
952                     if (x_inner != (mb_width -1)) {
953                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
954                         score_dep |= MB_SCOREBOARD_C;
955                     }
956                 }
957
958                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
959                 *command_ptr++ = kernel;
960                 *command_ptr++ = MPEG2_SCOREBOARD;
961                 /* Indirect data */
962                 *command_ptr++ = 0;
963                 /* the (X, Y) term of scoreboard */
964                 *command_ptr++ = ((y_inner << 16) | x_inner);
965                 *command_ptr++ = score_dep;
966                 /*inline data */
967                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
968                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
969                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
970                 *command_ptr++ = 0;
971
972                 x_inner -= 2;
973                 y_inner += 1;
974             }
975             x_outer += 1;
976         }
977
978         xtemp_outer = mb_width - 2;
979         if (xtemp_outer < 0)
980             xtemp_outer = 0;
981         x_outer = xtemp_outer;
982         y_outer = 0;
983         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
984             y_inner = y_outer;
985             x_inner = x_outer;
986             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
987                 mb_intra_ub = 0;
988                 score_dep = 0;
989                 if (x_inner != 0) {
990                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
991                     score_dep |= MB_SCOREBOARD_A;
992                 }
993                 if (y_inner != 0) {
994                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
995                     score_dep |= MB_SCOREBOARD_B;
996
997                     if (x_inner != 0)
998                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
999
1000                     if (x_inner != (mb_width -1)) {
1001                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1002                         score_dep |= MB_SCOREBOARD_C;
1003                     }
1004                 }
1005
1006                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1007                 *command_ptr++ = kernel;
1008                 *command_ptr++ = MPEG2_SCOREBOARD;
1009                 /* Indirect data */
1010                 *command_ptr++ = 0;
1011                 /* the (X, Y) term of scoreboard */
1012                 *command_ptr++ = ((y_inner << 16) | x_inner);
1013                 *command_ptr++ = score_dep;
1014                 /*inline data */
1015                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1016                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1017
1018                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1019                 *command_ptr++ = 0;
1020                 x_inner -= 2;
1021                 y_inner += 1;
1022             }
1023             x_outer++;
1024             if (x_outer >= mb_width) {
1025                 y_outer += 1;
1026                 x_outer = xtemp_outer;
1027             }
1028         }
1029     }
1030
1031     *command_ptr++ = MI_BATCH_BUFFER_END;
1032     *command_ptr++ = 0;
1033
1034     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1035     return;
1036 }
1037
1038 static void
1039 gen9_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1040                                     struct encode_state *encode_state,
1041                                     int mb_width, int mb_height,
1042                                     int kernel,
1043                                     int transform_8x8_mode_flag,
1044                                     struct intel_encoder_context *encoder_context)
1045 {
1046     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1047     int mb_x = 0, mb_y = 0;
1048     int i, s, j;
1049     unsigned int *command_ptr;
1050
1051
1052     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1053     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1054
1055     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1056         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1057
1058         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1059             int slice_mb_begin = slice_param->macroblock_address;
1060             int slice_mb_number = slice_param->num_macroblocks;
1061             unsigned int mb_intra_ub;
1062
1063             for (i = 0; i < slice_mb_number;) {
1064                 int mb_count = i + slice_mb_begin;
1065
1066                 mb_x = mb_count % mb_width;
1067                 mb_y = mb_count / mb_width;
1068                 mb_intra_ub = 0;
1069
1070                 if (mb_x != 0) {
1071                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1072                 }
1073
1074                 if (mb_y != 0) {
1075                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1076
1077                     if (mb_x != 0)
1078                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1079
1080                     if (mb_x != (mb_width -1))
1081                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1082                 }
1083
1084                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1085                 *command_ptr++ = kernel;
1086                 *command_ptr++ = 0;
1087                 *command_ptr++ = 0;
1088                 *command_ptr++ = 0;
1089                 *command_ptr++ = 0;
1090
1091                 /*inline data */
1092                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1093                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1094
1095                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1096                 *command_ptr++ = 0;
1097                 i += 1;
1098             }
1099
1100             slice_param++;
1101         }
1102     }
1103
1104     *command_ptr++ = MI_BATCH_BUFFER_END;
1105     *command_ptr++ = 0;
1106
1107     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1108 }
1109
1110 static void
1111 gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1112                                    struct encode_state *encode_state,
1113                                    int is_intra,
1114                                    struct intel_encoder_context *encoder_context)
1115 {
1116     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1117     struct intel_batchbuffer *batch = encoder_context->base.batch;
1118     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1119     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1120     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1121     bool allow_hwscore = true;
1122     int s;
1123     int kernel_shader;
1124     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1125
1126     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1127         int j;
1128         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1129
1130         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1131             if (slice_param->macroblock_address % width_in_mbs) {
1132                 allow_hwscore = false;
1133                 break;
1134             }
1135         }
1136     }
1137
1138     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1139     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1140         allow_hwscore = false;
1141         kernel_shader = VME_INTRA_SHADER;
1142     } else {
1143         kernel_shader = VME_INTER_SHADER;
1144     }
1145
1146     if (allow_hwscore)
1147         gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1148                                                    encode_state,
1149                                                    width_in_mbs, height_in_mbs,
1150                                                    kernel_shader,
1151                                                    encoder_context);
1152     else
1153         gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
1154                                             encode_state,
1155                                             width_in_mbs, height_in_mbs,
1156                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1157                                             0,
1158                                             encoder_context);
1159
1160     intel_batchbuffer_start_atomic(batch, 0x1000);
1161     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1162     BEGIN_BATCH(batch, 4);
1163     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1164     OUT_RELOC(batch,
1165               vme_context->vme_batchbuffer.bo,
1166               I915_GEM_DOMAIN_COMMAND, 0,
1167               0);
1168     OUT_BATCH(batch, 0);
1169     OUT_BATCH(batch, 0);
1170     ADVANCE_BATCH(batch);
1171
1172     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1173
1174     intel_batchbuffer_end_atomic(batch);
1175 }
1176
1177 static VAStatus
1178 gen9_vme_mpeg2_prepare(VADriverContextP ctx,
1179                        struct encode_state *encode_state,
1180                        struct intel_encoder_context *encoder_context)
1181 {
1182     VAStatus vaStatus = VA_STATUS_SUCCESS;
1183     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1184     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1185     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1186
1187     if ((!vme_context->mpeg2_level) ||
1188         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1189             vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1190     }
1191
1192     /*Setup all the memory object*/
1193     gen9_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1194     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1195     //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1196     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1197     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1198
1199     /*Programing media pipeline*/
1200     gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1201
1202     return vaStatus;
1203 }
1204
1205 static VAStatus
1206 gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
1207                         VAProfile profile,
1208                         struct encode_state *encode_state,
1209                         struct intel_encoder_context *encoder_context)
1210 {
1211     gen9_vme_media_init(ctx, encoder_context);
1212     gen9_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1213     gen9_vme_run(ctx, encode_state, encoder_context);
1214     gen9_vme_stop(ctx, encode_state, encoder_context);
1215
1216     return VA_STATUS_SUCCESS;
1217 }
1218
1219 static void
1220 gen9_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1221                                    struct encode_state *encode_state,
1222                                    int index,
1223                                    int is_intra,
1224                                    struct intel_encoder_context *encoder_context)
1225 {
1226     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1227     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1228     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1229
1230     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1231 }
1232
1233 static void
1234 gen9_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1235                                             struct encode_state *encode_state,
1236                                             int index,
1237                                             struct intel_encoder_context *encoder_context)
1238 {
1239     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1240     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1241     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1242
1243     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1244 }
1245
1246 static VAStatus
1247 gen9_vme_vp8_surface_setup(VADriverContextP ctx,
1248                              struct encode_state *encode_state,
1249                              int is_intra,
1250                              struct intel_encoder_context *encoder_context)
1251 {
1252     struct object_surface *obj_surface;
1253
1254     /*Setup surfaces state*/
1255     /* current picture for encoding */
1256     obj_surface = encode_state->input_yuv_object;
1257     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1258     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1259     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1260
1261     if (!is_intra) {
1262         /* reference 0 */
1263         obj_surface = encode_state->reference_objects[0];
1264
1265         if (obj_surface->bo != NULL)
1266             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1267
1268         /* reference 1 */
1269         obj_surface = encode_state->reference_objects[1];
1270
1271         if (obj_surface && obj_surface->bo != NULL)
1272             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1273     }
1274
1275     /* VME output */
1276     gen9_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1277     gen9_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1278
1279     return VA_STATUS_SUCCESS;
1280 }
1281
1282 static void
1283 gen9_vme_vp8_pipeline_programing(VADriverContextP ctx,
1284                                    struct encode_state *encode_state,
1285                                    int is_intra,
1286                                    struct intel_encoder_context *encoder_context)
1287 {
1288     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1289     struct intel_batchbuffer *batch = encoder_context->base.batch;
1290     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1291     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1292     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1293     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1294
1295     gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1296                                                  encode_state,
1297                                                  width_in_mbs, height_in_mbs,
1298                                                  kernel_shader,
1299                                                  encoder_context);
1300
1301     intel_batchbuffer_start_atomic(batch, 0x1000);
1302     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1303     BEGIN_BATCH(batch, 4);
1304     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1305     OUT_RELOC(batch,
1306               vme_context->vme_batchbuffer.bo,
1307               I915_GEM_DOMAIN_COMMAND, 0,
1308               0);
1309     OUT_BATCH(batch, 0);
1310     OUT_BATCH(batch, 0);
1311     ADVANCE_BATCH(batch);
1312
1313     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1314
1315     intel_batchbuffer_end_atomic(batch);
1316 }
1317
1318 static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
1319                                  struct encode_state *encode_state,
1320                                  struct intel_encoder_context *encoder_context)
1321 {
1322     VAStatus vaStatus = VA_STATUS_SUCCESS;
1323     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1324     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1325
1326     /* update vp8 mbmv cost */
1327     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1328
1329     /*Setup all the memory object*/
1330     gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1331     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1332     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1333
1334     /*Programing media pipeline*/
1335     gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1336
1337     return vaStatus;
1338 }
1339
1340 static VAStatus
1341 gen9_vme_vp8_pipeline(VADriverContextP ctx,
1342                         VAProfile profile,
1343                         struct encode_state *encode_state,
1344                         struct intel_encoder_context *encoder_context)
1345 {
1346     gen9_vme_media_init(ctx, encoder_context);
1347     gen9_vme_vp8_prepare(ctx, encode_state, encoder_context);
1348     gen9_vme_run(ctx, encode_state, encoder_context);
1349     gen9_vme_stop(ctx, encode_state, encoder_context);
1350
1351     return VA_STATUS_SUCCESS;
1352 }
1353
1354 /* HEVC */
1355
1356 static void
1357 gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
1358                              struct encode_state *encode_state,
1359                              int index,
1360                              struct intel_encoder_context *encoder_context)
1361
1362 {
1363     struct i965_driver_data *i965 = i965_driver_data(ctx);
1364     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1365     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1366     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1367     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1368     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1369     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1370
1371
1372     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
1373     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1374
1375     if (is_intra)
1376         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
1377     else
1378         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
1379     /*
1380      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
1381      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
1382      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
1383      */
1384
1385     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1386                                               "VME output buffer",
1387                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
1388                                               0x1000);
1389     assert(vme_context->vme_output.bo);
1390     vme_context->vme_buffer_suface_setup(ctx,
1391                                          &vme_context->gpe_context,
1392                                          &vme_context->vme_output,
1393                                          BINDING_TABLE_OFFSET(index),
1394                                          SURFACE_STATE_OFFSET(index));
1395 }
1396
1397 static void
1398 gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
1399                                       struct encode_state *encode_state,
1400                                       int index,
1401                                       struct intel_encoder_context *encoder_context)
1402
1403 {
1404     struct i965_driver_data *i965 = i965_driver_data(ctx);
1405     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1406     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1407     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1408     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1409
1410     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
1411     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
1412     vme_context->vme_batchbuffer.pitch = 16;
1413     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
1414                                                    "VME batchbuffer",
1415                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
1416                                                    0x1000);
1417 }
1418 static VAStatus
1419 gen9_vme_hevc_surface_setup(VADriverContextP ctx,
1420                        struct encode_state *encode_state,
1421                        int is_intra,
1422                        struct intel_encoder_context *encoder_context)
1423 {
1424     struct object_surface *obj_surface;
1425     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1426     GenHevcSurface *hevc_encoder_surface = NULL;
1427
1428     /*Setup surfaces state*/
1429     /* current picture for encoding */
1430     obj_surface = encode_state->input_yuv_object;
1431
1432     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
1433         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) {
1434         hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1435         assert(hevc_encoder_surface);
1436         obj_surface = hevc_encoder_surface->nv12_surface_obj;
1437     }
1438     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1439     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1440     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1441
1442     if (!is_intra) {
1443         VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1444         int slice_type;
1445
1446         slice_type = slice_param->slice_type;
1447         assert(slice_type != HEVC_SLICE_I);
1448
1449         /* to do HEVC */
1450         intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
1451
1452         if (slice_type == HEVC_SLICE_B)
1453             intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
1454     }
1455
1456     /* VME output */
1457     gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
1458     gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1459
1460     return VA_STATUS_SUCCESS;
1461 }
1462 static void
1463 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1464                                      struct encode_state *encode_state,
1465                                      int mb_width, int mb_height,
1466                                      int kernel,
1467                                      int transform_8x8_mode_flag,
1468                                      struct intel_encoder_context *encoder_context)
1469 {
1470     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1471     int mb_row;
1472     int s;
1473     unsigned int *command_ptr;
1474     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1475     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1476     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1477     int ctb_size = 1 << log2_ctb_size;
1478     int num_mb_in_ctb = (ctb_size + 15)/16;
1479     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1480
1481 #define         USE_SCOREBOARD          (1 << 21)
1482
1483     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1484     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1485
1486     /*slice_segment_address  must picture_width_in_ctb alainment */
1487     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1488         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1489         int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1490         int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1491         unsigned int mb_intra_ub, score_dep;
1492         int x_outer, y_outer, x_inner, y_inner;
1493         int xtemp_outer = 0;
1494
1495         x_outer = first_mb % mb_width;
1496         y_outer = first_mb / mb_width;
1497         mb_row = y_outer;
1498
1499         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1500             x_inner = x_outer;
1501             y_inner = y_outer;
1502             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1503                 mb_intra_ub = 0;
1504                 score_dep = 0;
1505                 if (x_inner != 0) {
1506                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1507                     score_dep |= MB_SCOREBOARD_A;
1508                 }
1509                 if (y_inner != mb_row) {
1510                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1511                     score_dep |= MB_SCOREBOARD_B;
1512                     if (x_inner != 0)
1513                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1514                     if (x_inner != (mb_width -1)) {
1515                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1516                         score_dep |= MB_SCOREBOARD_C;
1517                     }
1518                 }
1519
1520                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1521                 *command_ptr++ = kernel;
1522                 *command_ptr++ = USE_SCOREBOARD;
1523                 /* Indirect data */
1524                 *command_ptr++ = 0;
1525                 /* the (X, Y) term of scoreboard */
1526                 *command_ptr++ = ((y_inner << 16) | x_inner);
1527                 *command_ptr++ = score_dep;
1528                 /*inline data */
1529                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1530                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1531                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1532                 *command_ptr++ = 0;
1533
1534                 x_inner -= 2;
1535                 y_inner += 1;
1536             }
1537             x_outer += 1;
1538         }
1539
1540         xtemp_outer = mb_width - 2;
1541         if (xtemp_outer < 0)
1542             xtemp_outer = 0;
1543         x_outer = xtemp_outer;
1544         y_outer = first_mb / mb_width;
1545         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1546             y_inner = y_outer;
1547             x_inner = x_outer;
1548             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1549                 mb_intra_ub = 0;
1550                 score_dep = 0;
1551                 if (x_inner != 0) {
1552                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1553                     score_dep |= MB_SCOREBOARD_A;
1554                 }
1555                 if (y_inner != mb_row) {
1556                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1557                     score_dep |= MB_SCOREBOARD_B;
1558                     if (x_inner != 0)
1559                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1560
1561                     if (x_inner != (mb_width -1)) {
1562                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1563                         score_dep |= MB_SCOREBOARD_C;
1564                     }
1565                 }
1566
1567                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1568                 *command_ptr++ = kernel;
1569                 *command_ptr++ = USE_SCOREBOARD;
1570                 /* Indirect data */
1571                 *command_ptr++ = 0;
1572                 /* the (X, Y) term of scoreboard */
1573                 *command_ptr++ = ((y_inner << 16) | x_inner);
1574                 *command_ptr++ = score_dep;
1575                 /*inline data */
1576                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1577                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1578
1579                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1580                 *command_ptr++ = 0;
1581                 x_inner -= 2;
1582                 y_inner += 1;
1583             }
1584             x_outer++;
1585             if (x_outer >= mb_width) {
1586                 y_outer += 1;
1587                 x_outer = xtemp_outer;
1588             }
1589         }
1590     }
1591
1592     *command_ptr++ = MI_BATCH_BUFFER_END;
1593     *command_ptr++ = 0;
1594
1595     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1596 }
1597
1598 static void
1599 gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
1600                               struct encode_state *encode_state,
1601                               int mb_width, int mb_height,
1602                               int kernel,
1603                               int transform_8x8_mode_flag,
1604                               struct intel_encoder_context *encoder_context)
1605 {
1606     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1607     int mb_x = 0, mb_y = 0;
1608     int i, s;
1609     unsigned int *command_ptr;
1610     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1611     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1612     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1613
1614     int ctb_size = 1 << log2_ctb_size;
1615     int num_mb_in_ctb = (ctb_size + 15)/16;
1616     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1617
1618     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1619     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1620
1621     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1622         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1623         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1624         int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1625
1626         unsigned int mb_intra_ub;
1627         int slice_mb_x = slice_mb_begin % mb_width;
1628         for (i = 0; i < slice_mb_number;  ) {
1629             int mb_count = i + slice_mb_begin;
1630             mb_x = mb_count % mb_width;
1631             mb_y = mb_count / mb_width;
1632             mb_intra_ub = 0;
1633
1634             if (mb_x != 0) {
1635                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1636             }
1637             if (mb_y != 0) {
1638                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1639                 if (mb_x != 0)
1640                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1641                 if (mb_x != (mb_width -1))
1642                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1643             }
1644             if (i < mb_width) {
1645                 if (i == 0)
1646                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
1647                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
1648                 if ((i == (mb_width - 1)) && slice_mb_x) {
1649                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1650                 }
1651             }
1652
1653             if ((i == mb_width) && slice_mb_x) {
1654                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
1655             }
1656
1657             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1658             *command_ptr++ = kernel;
1659             *command_ptr++ = 0;
1660             *command_ptr++ = 0;
1661             *command_ptr++ = 0;
1662             *command_ptr++ = 0;
1663
1664             /*inline data */
1665             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1666             *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1667
1668             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1669             *command_ptr++ = 0;
1670             i += 1;
1671         }
1672     }
1673
1674     *command_ptr++ = MI_BATCH_BUFFER_END;
1675     *command_ptr++ = 0;
1676
1677     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1678 }
1679
1680 static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
1681                                          struct encode_state *encode_state,
1682                                          struct intel_encoder_context *encoder_context)
1683 {
1684     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1685     struct intel_batchbuffer *batch = encoder_context->base.batch;
1686     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1687     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1688     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
1689     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
1690     int kernel_shader;
1691     bool allow_hwscore = true;
1692     int s;
1693
1694     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1695     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1696
1697     int ctb_size = 1 << log2_ctb_size;
1698     int num_mb_in_ctb = (ctb_size + 15)/16;
1699     int transform_8x8_mode_flag = 1;
1700     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1701
1702     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1703         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1704         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1705         if ((slice_mb_begin % width_in_mbs)) {
1706             allow_hwscore = false;
1707             break;
1708         }
1709     }
1710
1711     if (pSliceParameter->slice_type == HEVC_SLICE_I) {
1712         kernel_shader = VME_INTRA_SHADER;
1713     } else if (pSliceParameter->slice_type == HEVC_SLICE_P) {
1714         kernel_shader = VME_INTER_SHADER;
1715     } else {
1716         kernel_shader = VME_BINTER_SHADER;
1717         if (!allow_hwscore)
1718             kernel_shader = VME_INTER_SHADER;
1719     }
1720     if (allow_hwscore)
1721         gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
1722                                                encode_state,
1723                                                width_in_mbs, height_in_mbs,
1724                                                kernel_shader,
1725                                                transform_8x8_mode_flag,
1726                                                encoder_context);
1727     else
1728         gen9_vme_hevc_fill_vme_batchbuffer(ctx,
1729                                       encode_state,
1730                                       width_in_mbs, height_in_mbs,
1731                                       kernel_shader,
1732                                       transform_8x8_mode_flag,
1733                                       encoder_context);
1734
1735     intel_batchbuffer_start_atomic(batch, 0x1000);
1736     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1737     BEGIN_BATCH(batch, 3);
1738     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1739     OUT_RELOC(batch,
1740               vme_context->vme_batchbuffer.bo,
1741               I915_GEM_DOMAIN_COMMAND, 0,
1742               0);
1743     OUT_BATCH(batch, 0);
1744     ADVANCE_BATCH(batch);
1745
1746     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1747
1748     intel_batchbuffer_end_atomic(batch);
1749 }
1750
1751 static VAStatus gen9_intel_init_hevc_surface(VADriverContextP ctx,
1752                             struct intel_encoder_context *encoder_context,
1753                             struct encode_state *encode_state,
1754                             struct object_surface *input_obj_surface)
1755 {
1756     struct i965_driver_data *i965 = i965_driver_data(ctx);
1757     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1758     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1759     GenHevcSurface *hevc_encoder_surface;
1760     struct i965_surface src_surface, dst_surface;
1761     struct object_surface *obj_surface;
1762     VARectangle rect;
1763     VAStatus status;
1764
1765     uint32_t size;
1766
1767     obj_surface = input_obj_surface;
1768     assert(obj_surface && obj_surface->bo);
1769
1770     if (obj_surface->private_data == NULL) {
1771
1772         if (mfc_context->pic_size.ctb_size == 16)
1773             size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
1774             ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
1775         else
1776             size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
1777             ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
1778         size <<= 6; /* in unit of 64bytes */
1779
1780         hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
1781
1782         assert(hevc_encoder_surface);
1783         hevc_encoder_surface->motion_vector_temporal_bo =
1784             dri_bo_alloc(i965->intel.bufmgr,
1785             "motion vector temporal buffer",
1786             size,
1787             0x1000);
1788         assert(hevc_encoder_surface->motion_vector_temporal_bo);
1789
1790         hevc_encoder_surface->ctx = ctx;
1791         hevc_encoder_surface->nv12_surface_obj = NULL;
1792         hevc_encoder_surface->nv12_surface_id = VA_INVALID_SURFACE;
1793         hevc_encoder_surface->has_p010_to_nv12_done = 0;
1794
1795         obj_surface->private_data = (void *)hevc_encoder_surface;
1796         obj_surface->free_private_data = (void *)gen_free_hevc_surface;
1797     }
1798
1799     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1800
1801     if(!hevc_encoder_surface->has_p010_to_nv12_done && obj_surface->fourcc == VA_FOURCC_P010)
1802     {
1803         // convert input
1804         rect.x = 0;
1805         rect.y = 0;
1806         rect.width = obj_surface->orig_width;
1807         rect.height = obj_surface->orig_height;
1808
1809         src_surface.base = (struct object_base *)obj_surface;
1810         src_surface.type = I965_SURFACE_TYPE_SURFACE;
1811         src_surface.flags = I965_SURFACE_FLAG_FRAME;
1812
1813         if(SURFACE(hevc_encoder_surface->nv12_surface_id) == NULL)
1814         {
1815             status = i965_CreateSurfaces(ctx,
1816                 obj_surface->orig_width,
1817                 obj_surface->orig_height,
1818                 VA_RT_FORMAT_YUV420,
1819                 1,
1820                 &hevc_encoder_surface->nv12_surface_id);
1821             assert(status == VA_STATUS_SUCCESS);
1822
1823             if (status != VA_STATUS_SUCCESS)
1824                 return status;
1825         }
1826
1827         obj_surface = SURFACE(hevc_encoder_surface->nv12_surface_id);
1828         hevc_encoder_surface->nv12_surface_obj = obj_surface;
1829         assert(obj_surface);
1830         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1831
1832         dst_surface.base = (struct object_base *)obj_surface;
1833         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
1834         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
1835
1836         status = i965_image_processing(ctx,
1837             &src_surface,
1838             &rect,
1839             &dst_surface,
1840             &rect);
1841         assert(status == VA_STATUS_SUCCESS);
1842         hevc_encoder_surface->has_p010_to_nv12_done = 1;
1843     }
1844     return VA_STATUS_SUCCESS;
1845 }
1846
1847 static VAStatus gen9_intel_hevc_input_check(VADriverContextP ctx,
1848                             struct encode_state *encode_state,
1849                             struct intel_encoder_context *encoder_context)
1850 {
1851     struct i965_driver_data *i965 = i965_driver_data(ctx);
1852     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1853     struct object_surface *obj_surface;
1854     GenHevcSurface *hevc_encoder_surface = NULL;
1855     int i;
1856     int fourcc;
1857
1858     obj_surface = SURFACE(encode_state->current_render_target);
1859     assert(obj_surface && obj_surface->bo);
1860     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1861     if(hevc_encoder_surface)
1862         hevc_encoder_surface->has_p010_to_nv12_done = 0;
1863     gen9_intel_init_hevc_surface(ctx,encoder_context,encode_state,obj_surface);
1864
1865     fourcc = obj_surface->fourcc;
1866     /* Setup current frame and current direct mv buffer*/
1867     obj_surface = encode_state->reconstructed_object;
1868     if(fourcc == VA_FOURCC_P010)
1869         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_P010, SUBSAMPLE_YUV420);
1870     else
1871         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1872     hevc_encoder_surface = NULL;
1873     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1874     if(hevc_encoder_surface)
1875         hevc_encoder_surface->has_p010_to_nv12_done = 1;
1876     gen9_intel_init_hevc_surface(ctx,encoder_context,encode_state,obj_surface);
1877
1878     /* Setup reference frames and direct mv buffers*/
1879     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
1880         obj_surface = encode_state->reference_objects[i];
1881
1882         if (obj_surface && obj_surface->bo) {
1883             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1884             dri_bo_reference(obj_surface->bo);
1885
1886             gen9_intel_init_hevc_surface(ctx,encoder_context,encode_state,obj_surface);
1887         } else {
1888             break;
1889         }
1890     }
1891
1892     return VA_STATUS_SUCCESS;
1893 }
1894
1895 static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
1896                                  struct encode_state *encode_state,
1897                                  struct intel_encoder_context *encoder_context)
1898 {
1899     VAStatus vaStatus = VA_STATUS_SUCCESS;
1900     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1901     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1902     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1903     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1904
1905     /* here use the avc level for hevc vme */
1906     if (!vme_context->hevc_level ||
1907         (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
1908         vme_context->hevc_level = pSequenceParameter->general_level_idc;
1909     }
1910
1911     //internal input check for main10
1912     gen9_intel_hevc_input_check(ctx,encode_state,encoder_context);
1913
1914     intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
1915
1916     /*Setup all the memory object*/
1917     gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
1918     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1919     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
1920     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1921
1922     /*Programing media pipeline*/
1923     gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
1924
1925     return vaStatus;
1926 }
1927
1928
1929 static VAStatus
1930 gen9_vme_hevc_pipeline(VADriverContextP ctx,
1931                   VAProfile profile,
1932                   struct encode_state *encode_state,
1933                   struct intel_encoder_context *encoder_context)
1934 {
1935     gen9_vme_media_init(ctx, encoder_context);
1936     gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
1937     gen9_vme_run(ctx, encode_state, encoder_context);
1938     gen9_vme_stop(ctx, encode_state, encoder_context);
1939
1940     return VA_STATUS_SUCCESS;
1941 }
1942
1943
1944 static void
1945 gen9_vme_context_destroy(void *context)
1946 {
1947     struct gen6_vme_context *vme_context = context;
1948
1949     gen8_gpe_context_destroy(&vme_context->gpe_context);
1950
1951     dri_bo_unreference(vme_context->vme_output.bo);
1952     vme_context->vme_output.bo = NULL;
1953
1954     dri_bo_unreference(vme_context->vme_state.bo);
1955     vme_context->vme_state.bo = NULL;
1956
1957     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1958     vme_context->vme_batchbuffer.bo = NULL;
1959
1960     free(vme_context->vme_state_message);
1961     vme_context->vme_state_message = NULL;
1962
1963     dri_bo_unreference(vme_context->i_qp_cost_table);
1964     vme_context->i_qp_cost_table = NULL;
1965
1966     dri_bo_unreference(vme_context->p_qp_cost_table);
1967     vme_context->p_qp_cost_table = NULL;
1968
1969     dri_bo_unreference(vme_context->b_qp_cost_table);
1970     vme_context->b_qp_cost_table = NULL;
1971
1972     free(vme_context->qp_per_mb);
1973     vme_context->qp_per_mb = NULL;
1974
1975     free(vme_context);
1976 }
1977
1978 Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1979 {
1980     struct gen6_vme_context *vme_context;
1981     struct i965_kernel *vme_kernel_list = NULL;
1982     int i965_kernel_num;
1983
1984     if (encoder_context->low_power_mode || encoder_context->codec == CODEC_JPEG) {
1985         encoder_context->vme_context = NULL;
1986         encoder_context->vme_pipeline = NULL;
1987         encoder_context->vme_context_destroy = NULL;
1988
1989         return True;
1990     } else if (encoder_context->codec == CODEC_VP9) {
1991         return gen9_vp9_vme_context_init(ctx, encoder_context);
1992     }
1993
1994     vme_context = calloc(1, sizeof(struct gen6_vme_context));
1995
1996     switch (encoder_context->codec) {
1997     case CODEC_H264:
1998     case CODEC_H264_MVC:
1999         vme_kernel_list = gen9_vme_kernels;
2000         encoder_context->vme_pipeline = gen9_vme_pipeline;
2001         i965_kernel_num = sizeof(gen9_vme_kernels) / sizeof(struct i965_kernel);
2002         break;
2003
2004     case CODEC_MPEG2:
2005         vme_kernel_list = gen9_vme_mpeg2_kernels;
2006         encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
2007         i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
2008         break;
2009
2010     case CODEC_VP8:
2011         vme_kernel_list = gen9_vme_vp8_kernels;
2012         encoder_context->vme_pipeline = gen9_vme_vp8_pipeline;
2013         i965_kernel_num = sizeof(gen9_vme_vp8_kernels) / sizeof(struct i965_kernel);
2014         break;
2015
2016     case CODEC_HEVC:
2017         vme_kernel_list = gen9_vme_hevc_kernels;
2018         encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
2019         i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
2020         break;
2021
2022     default:
2023         /* never get here */
2024         assert(0);
2025
2026         break;
2027     }
2028
2029     assert(vme_context);
2030     vme_context->vme_kernel_sum = i965_kernel_num;
2031     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2032
2033     vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
2034     vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
2035     vme_context->gpe_context.sampler_size = 0;
2036
2037
2038     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2039     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
2040     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
2041     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
2042     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
2043
2044     gen7_vme_scoreboard_init(ctx, vme_context);
2045
2046     gen8_gpe_load_kernels(ctx,
2047                           &vme_context->gpe_context,
2048                           vme_kernel_list,
2049                           i965_kernel_num);
2050     vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
2051     vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
2052     vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2053     vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
2054
2055     encoder_context->vme_context = vme_context;
2056     encoder_context->vme_context_destroy = gen9_vme_context_destroy;
2057
2058     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
2059
2060     return True;
2061 }