OSDN Git Service

Update README.md
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_vme.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_api.h"
42 #include "gen6_vme.h"
43 #include "gen6_mfc.h"
44
45 #ifdef SURFACE_STATE_PADDED_SIZE
46 #undef SURFACE_STATE_PADDED_SIZE
47 #endif
48
49 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
50 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
51 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52
53 #define VME_INTRA_SHADER        0
54 #define VME_INTER_SHADER        1
55 #define VME_BINTER_SHADER   2
56
57 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
58 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
59 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
60
61 #define VME_MSG_LENGTH      32
62
63 static const uint32_t gen8_vme_intra_frame[][4] = {
64 #include "shaders/vme/intra_frame_gen8.g8b"
65 };
66
67 static const uint32_t gen8_vme_inter_frame[][4] = {
68 #include "shaders/vme/inter_frame_gen8.g8b"
69 };
70
71 static const uint32_t gen8_vme_inter_bframe[][4] = {
72 #include "shaders/vme/inter_bframe_gen8.g8b"
73 };
74
75 static struct i965_kernel gen8_vme_kernels[] = {
76     {
77         "VME Intra Frame",
78         VME_INTRA_SHADER, /*index*/
79         gen8_vme_intra_frame,
80         sizeof(gen8_vme_intra_frame),
81         NULL
82     },
83     {
84         "VME inter Frame",
85         VME_INTER_SHADER,
86         gen8_vme_inter_frame,
87         sizeof(gen8_vme_inter_frame),
88         NULL
89     },
90     {
91         "VME inter BFrame",
92         VME_BINTER_SHADER,
93         gen8_vme_inter_bframe,
94         sizeof(gen8_vme_inter_bframe),
95         NULL
96     }
97 };
98
99 static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = {
100 #include "shaders/vme/intra_frame_gen8.g8b"
101 };
102
103 static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = {
104 #include "shaders/vme/mpeg2_inter_gen8.g8b"
105 };
106
107 static struct i965_kernel gen8_vme_mpeg2_kernels[] = {
108     {
109         "VME Intra Frame",
110         VME_INTRA_SHADER, /*index*/
111         gen8_vme_mpeg2_intra_frame,
112         sizeof(gen8_vme_mpeg2_intra_frame),
113         NULL
114     },
115     {
116         "VME inter Frame",
117         VME_INTER_SHADER,
118         gen8_vme_mpeg2_inter_frame,
119         sizeof(gen8_vme_mpeg2_inter_frame),
120         NULL
121     },
122 };
123
124 static const uint32_t gen8_vme_vp8_intra_frame[][4] = {
125 #include "shaders/vme/vp8_intra_frame_gen8.g8b"
126 };
127
128 static const uint32_t gen8_vme_vp8_inter_frame[][4] = {
129 #include "shaders/vme/vp8_inter_frame_gen8.g8b"
130 };
131
132 static struct i965_kernel gen8_vme_vp8_kernels[] = {
133     {
134         "VME Intra Frame",
135         VME_INTRA_SHADER, /*index*/
136         gen8_vme_vp8_intra_frame,
137         sizeof(gen8_vme_vp8_intra_frame),
138         NULL
139     },
140     {
141         "VME inter Frame",
142         VME_INTER_SHADER,
143         gen8_vme_vp8_inter_frame,
144         sizeof(gen8_vme_vp8_inter_frame),
145         NULL
146     },
147 };
148
149 /* only used for VME source surface state */
150 static void
151 gen8_vme_source_surface_state(VADriverContextP ctx,
152                               int index,
153                               struct object_surface *obj_surface,
154                               struct intel_encoder_context *encoder_context)
155 {
156     struct gen6_vme_context *vme_context = encoder_context->vme_context;
157
158     vme_context->vme_surface2_setup(ctx,
159                                     &vme_context->gpe_context,
160                                     obj_surface,
161                                     BINDING_TABLE_OFFSET(index),
162                                     SURFACE_STATE_OFFSET(index));
163 }
164
165 static void
166 gen8_vme_media_source_surface_state(VADriverContextP ctx,
167                                     int index,
168                                     struct object_surface *obj_surface,
169                                     struct intel_encoder_context *encoder_context)
170 {
171     struct gen6_vme_context *vme_context = encoder_context->vme_context;
172
173     vme_context->vme_media_rw_surface_setup(ctx,
174                                             &vme_context->gpe_context,
175                                             obj_surface,
176                                             BINDING_TABLE_OFFSET(index),
177                                             SURFACE_STATE_OFFSET(index),
178                                             0);
179 }
180
181 static void
182 gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx,
183                                            int index,
184                                            struct object_surface *obj_surface,
185                                            struct intel_encoder_context *encoder_context)
186 {
187     struct gen6_vme_context *vme_context = encoder_context->vme_context;
188
189     vme_context->vme_media_chroma_surface_setup(ctx,
190                                                 &vme_context->gpe_context,
191                                                 obj_surface,
192                                                 BINDING_TABLE_OFFSET(index),
193                                                 SURFACE_STATE_OFFSET(index),
194                                                 0);
195 }
196
197 static void
198 gen8_vme_output_buffer_setup(VADriverContextP ctx,
199                              struct encode_state *encode_state,
200                              int index,
201                              struct intel_encoder_context *encoder_context,
202                              int is_intra,
203                              int width_in_mbs,
204                              int height_in_mbs)
205
206 {
207     struct i965_driver_data *i965 = i965_driver_data(ctx);
208     struct gen6_vme_context *vme_context = encoder_context->vme_context;
209
210     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
211     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
212
213     if (is_intra)
214         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
215     else
216         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
217     /*
218      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
219      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
220      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
221      */
222
223     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
224                                               "VME output buffer",
225                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
226                                               0x1000);
227     assert(vme_context->vme_output.bo);
228     vme_context->vme_buffer_suface_setup(ctx,
229                                          &vme_context->gpe_context,
230                                          &vme_context->vme_output,
231                                          BINDING_TABLE_OFFSET(index),
232                                          SURFACE_STATE_OFFSET(index));
233 }
234
235 static void
236 gen8_vme_avc_output_buffer_setup(VADriverContextP ctx,
237                                  struct encode_state *encode_state,
238                                  int index,
239                                  struct intel_encoder_context *encoder_context)
240 {
241     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
242     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
243     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
244     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
245     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
246
247     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
248
249 }
250
251 static void
252 gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
253                                       struct encode_state *encode_state,
254                                       int index,
255                                       struct intel_encoder_context *encoder_context,
256                                       int width_in_mbs,
257                                       int height_in_mbs)
258 {
259     struct i965_driver_data *i965 = i965_driver_data(ctx);
260     struct gen6_vme_context *vme_context = encoder_context->vme_context;
261
262     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
263     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
264     vme_context->vme_batchbuffer.pitch = 16;
265     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
266                                                    "VME batchbuffer",
267                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
268                                                    0x1000);
269     vme_context->vme_buffer_suface_setup(ctx,
270                                          &vme_context->gpe_context,
271                                          &vme_context->vme_batchbuffer,
272                                          BINDING_TABLE_OFFSET(index),
273                                          SURFACE_STATE_OFFSET(index));
274 }
275
276 static void
277 gen8_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
278                                           struct encode_state *encode_state,
279                                           int index,
280                                           struct intel_encoder_context *encoder_context)
281 {
282     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
283     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
284     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
285
286     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
287 }
288
289 static VAStatus
290 gen8_vme_surface_setup(VADriverContextP ctx,
291                        struct encode_state *encode_state,
292                        int is_intra,
293                        struct intel_encoder_context *encoder_context)
294 {
295     struct object_surface *obj_surface;
296
297     /*Setup surfaces state*/
298     /* current picture for encoding */
299     obj_surface = encode_state->input_yuv_object;
300     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
301     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
302     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
303
304     if (!is_intra) {
305         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
306         int slice_type;
307
308         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
309         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
310
311         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state);
312
313         if (slice_type == SLICE_TYPE_B)
314             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state);
315     }
316
317     /* VME output */
318     gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
319     gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
320     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
321                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
322                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
323
324     return VA_STATUS_SUCCESS;
325 }
326
327 static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
328                                          struct encode_state *encode_state,
329                                          struct intel_encoder_context *encoder_context)
330 {
331     struct gen6_vme_context *vme_context = encoder_context->vme_context;
332     struct gen8_interface_descriptor_data *desc;
333     int i;
334     dri_bo *bo;
335     unsigned char *desc_ptr;
336
337     bo = vme_context->gpe_context.idrt.bo;
338     dri_bo_map(bo, 1);
339     assert(bo->virtual);
340     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;
341
342     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
343
344     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
345         struct i965_kernel *kernel;
346         kernel = &vme_context->gpe_context.kernels[i];
347         assert(sizeof(*desc) == 32);
348         /*Setup the descritor table*/
349         memset(desc, 0, sizeof(*desc));
350         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
351         desc->desc3.sampler_count = 0; /* FIXME: */
352         desc->desc3.sampler_state_pointer = 0;
353         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
354         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
355         desc->desc5.constant_urb_entry_read_offset = 0;
356         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
357
358
359         desc++;
360     }
361
362     dri_bo_unmap(bo);
363
364     return VA_STATUS_SUCCESS;
365 }
366
367 static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
368                                         struct encode_state *encode_state,
369                                         struct intel_encoder_context *encoder_context,
370                                         int denom)
371 {
372     struct gen6_vme_context *vme_context = encoder_context->vme_context;
373     unsigned char *constant_buffer;
374     unsigned int *vme_state_message;
375     int mv_num = 32;
376
377     vme_state_message = (unsigned int *)vme_context->vme_state_message;
378
379     if (encoder_context->codec == CODEC_H264 ||
380         encoder_context->codec == CODEC_H264_MVC) {
381         if (vme_context->h264_level >= 30) {
382             mv_num = 16 / denom;
383
384             if (vme_context->h264_level >= 31)
385                 mv_num = 8 / denom;
386         }
387     } else if (encoder_context->codec == CODEC_MPEG2) {
388         mv_num = 2 / denom;
389     }
390
391     vme_state_message[31] = mv_num;
392
393     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
394     assert(vme_context->gpe_context.curbe.bo->virtual);
395     constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
396                       vme_context->gpe_context.curbe.offset;
397
398     /* VME MV/Mb cost table is passed by using const buffer */
399     /* Now it uses the fixed search path. So it is constructed directly
400      * in the GPU shader.
401      */
402     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
403
404     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
405
406     return VA_STATUS_SUCCESS;
407 }
408
409 #define     MB_SCOREBOARD_A     (1 << 0)
410 #define     MB_SCOREBOARD_B     (1 << 1)
411 #define     MB_SCOREBOARD_C     (1 << 2)
412
413 /* check whether the mb of (x_index, y_index) is out of bound */
414 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
415 {
416     int mb_index;
417     if (x_index < 0 || x_index >= mb_width)
418         return -1;
419     if (y_index < 0 || y_index >= mb_height)
420         return -1;
421
422     mb_index = y_index * mb_width + x_index;
423     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
424         return -1;
425     return 0;
426 }
427
428 static void
429 gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
430                                        struct encode_state *encode_state,
431                                        int mb_width, int mb_height,
432                                        int kernel,
433                                        int transform_8x8_mode_flag,
434                                        struct intel_encoder_context *encoder_context)
435 {
436     struct gen6_vme_context *vme_context = encoder_context->vme_context;
437     int mb_row;
438     int s;
439     unsigned int *command_ptr;
440
441 #define     USE_SCOREBOARD      (1 << 21)
442
443     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
444     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
445
446     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
447         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
448         int first_mb = pSliceParameter->macroblock_address;
449         int num_mb = pSliceParameter->num_macroblocks;
450         unsigned int mb_intra_ub, score_dep;
451         int x_outer, y_outer, x_inner, y_inner;
452         int xtemp_outer = 0;
453
454         x_outer = first_mb % mb_width;
455         y_outer = first_mb / mb_width;
456         mb_row = y_outer;
457
458         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
459             x_inner = x_outer;
460             y_inner = y_outer;
461             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
462                 mb_intra_ub = 0;
463                 score_dep = 0;
464                 if (x_inner != 0) {
465                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
466                     score_dep |= MB_SCOREBOARD_A;
467                 }
468                 if (y_inner != mb_row) {
469                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
470                     score_dep |= MB_SCOREBOARD_B;
471                     if (x_inner != 0)
472                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
473                     if (x_inner != (mb_width - 1)) {
474                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
475                         score_dep |= MB_SCOREBOARD_C;
476                     }
477                 }
478
479                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
480                 *command_ptr++ = kernel;
481                 *command_ptr++ = USE_SCOREBOARD;
482                 /* Indirect data */
483                 *command_ptr++ = 0;
484                 /* the (X, Y) term of scoreboard */
485                 *command_ptr++ = ((y_inner << 16) | x_inner);
486                 *command_ptr++ = score_dep;
487                 /*inline data */
488                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
489                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
490                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
491                 *command_ptr++ = 0;
492
493                 x_inner -= 2;
494                 y_inner += 1;
495             }
496             x_outer += 1;
497         }
498
499         xtemp_outer = mb_width - 2;
500         if (xtemp_outer < 0)
501             xtemp_outer = 0;
502         x_outer = xtemp_outer;
503         y_outer = first_mb / mb_width;
504         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
505             y_inner = y_outer;
506             x_inner = x_outer;
507             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
508                 mb_intra_ub = 0;
509                 score_dep = 0;
510                 if (x_inner != 0) {
511                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
512                     score_dep |= MB_SCOREBOARD_A;
513                 }
514                 if (y_inner != mb_row) {
515                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
516                     score_dep |= MB_SCOREBOARD_B;
517                     if (x_inner != 0)
518                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
519
520                     if (x_inner != (mb_width - 1)) {
521                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
522                         score_dep |= MB_SCOREBOARD_C;
523                     }
524                 }
525
526                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
527                 *command_ptr++ = kernel;
528                 *command_ptr++ = USE_SCOREBOARD;
529                 /* Indirect data */
530                 *command_ptr++ = 0;
531                 /* the (X, Y) term of scoreboard */
532                 *command_ptr++ = ((y_inner << 16) | x_inner);
533                 *command_ptr++ = score_dep;
534                 /*inline data */
535                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
536                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
537
538                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
539                 *command_ptr++ = 0;
540                 x_inner -= 2;
541                 y_inner += 1;
542             }
543             x_outer++;
544             if (x_outer >= mb_width) {
545                 y_outer += 1;
546                 x_outer = xtemp_outer;
547             }
548         }
549     }
550
551     *command_ptr++ = MI_BATCH_BUFFER_END;
552     *command_ptr++ = 0;
553
554     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
555 }
556
557 static void
558 gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx,
559                               struct encode_state *encode_state,
560                               int mb_width, int mb_height,
561                               int kernel,
562                               int transform_8x8_mode_flag,
563                               struct intel_encoder_context *encoder_context)
564 {
565     struct gen6_vme_context *vme_context = encoder_context->vme_context;
566     int mb_x = 0, mb_y = 0;
567     int i, s;
568     unsigned int *command_ptr;
569     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
570     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
571     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
572     int qp;
573     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
574     int qp_mb, qp_index;
575
576     if (encoder_context->rate_control_mode == VA_RC_CQP)
577         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
578     else
579         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
580
581     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
582     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
583
584     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
585         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
586         int slice_mb_begin = pSliceParameter->macroblock_address;
587         int slice_mb_number = pSliceParameter->num_macroblocks;
588         unsigned int mb_intra_ub;
589         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
590         for (i = 0; i < slice_mb_number;) {
591             int mb_count = i + slice_mb_begin;
592             mb_x = mb_count % mb_width;
593             mb_y = mb_count / mb_width;
594             mb_intra_ub = 0;
595             if (mb_x != 0) {
596                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
597             }
598             if (mb_y != 0) {
599                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
600                 if (mb_x != 0)
601                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
602                 if (mb_x != (mb_width - 1))
603                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
604             }
605             if (i < mb_width) {
606                 if (i == 0)
607                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
608                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
609                 if ((i == (mb_width - 1)) && slice_mb_x) {
610                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
611                 }
612             }
613
614             if ((i == mb_width) && slice_mb_x) {
615                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
616             }
617             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
618             *command_ptr++ = kernel;
619             *command_ptr++ = 0;
620             *command_ptr++ = 0;
621             *command_ptr++ = 0;
622             *command_ptr++ = 0;
623
624             /*inline data */
625             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
626             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
627             /* qp occupies one byte */
628             if (vme_context->roi_enabled) {
629                 qp_index = mb_y * mb_width + mb_x;
630                 qp_mb = *(vme_context->qp_per_mb + qp_index);
631             } else
632                 qp_mb = qp;
633             *command_ptr++ = qp_mb;
634
635             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
636             *command_ptr++ = 0;
637             i += 1;
638         }
639     }
640
641     *command_ptr++ = MI_BATCH_BUFFER_END;
642     *command_ptr++ = 0;
643
644     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
645 }
646
647 static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
648 {
649     struct gen6_vme_context *vme_context = encoder_context->vme_context;
650
651     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
652
653     /* VME output buffer */
654     dri_bo_unreference(vme_context->vme_output.bo);
655     vme_context->vme_output.bo = NULL;
656
657     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
658     vme_context->vme_batchbuffer.bo = NULL;
659
660     /* VME state */
661     dri_bo_unreference(vme_context->vme_state.bo);
662     vme_context->vme_state.bo = NULL;
663 }
664
665 static void gen8_vme_pipeline_programing(VADriverContextP ctx,
666                                          struct encode_state *encode_state,
667                                          struct intel_encoder_context *encoder_context)
668 {
669     struct gen6_vme_context *vme_context = encoder_context->vme_context;
670     struct intel_batchbuffer *batch = encoder_context->base.batch;
671     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
672     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
673     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
674     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
675     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
676     int kernel_shader;
677     bool allow_hwscore = true;
678     int s;
679     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
680
681     if (is_low_quality)
682         allow_hwscore = false;
683     else {
684         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
685             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
686             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
687                 allow_hwscore = false;
688                 break;
689             }
690         }
691     }
692
693     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
694         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
695         kernel_shader = VME_INTRA_SHADER;
696     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
697                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
698         kernel_shader = VME_INTER_SHADER;
699     } else {
700         kernel_shader = VME_BINTER_SHADER;
701         if (!allow_hwscore)
702             kernel_shader = VME_INTER_SHADER;
703     }
704     if (allow_hwscore)
705         gen8wa_vme_walker_fill_vme_batchbuffer(ctx,
706                                                encode_state,
707                                                width_in_mbs, height_in_mbs,
708                                                kernel_shader,
709                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
710                                                encoder_context);
711     else
712         gen8_vme_fill_vme_batchbuffer(ctx,
713                                       encode_state,
714                                       width_in_mbs, height_in_mbs,
715                                       kernel_shader,
716                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
717                                       encoder_context);
718
719     intel_batchbuffer_start_atomic(batch, 0x1000);
720     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
721     BEGIN_BATCH(batch, 3);
722     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
723     OUT_RELOC64(batch,
724                 vme_context->vme_batchbuffer.bo,
725                 I915_GEM_DOMAIN_COMMAND, 0,
726                 0);
727     ADVANCE_BATCH(batch);
728
729     intel_batchbuffer_end_atomic(batch);
730 }
731
732 static VAStatus gen8_vme_prepare(VADriverContextP ctx,
733                                  struct encode_state *encode_state,
734                                  struct intel_encoder_context *encoder_context)
735 {
736     VAStatus vaStatus = VA_STATUS_SUCCESS;
737     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
738     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
739     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
740     struct gen6_vme_context *vme_context = encoder_context->vme_context;
741
742     if (!vme_context->h264_level ||
743         (vme_context->h264_level != pSequenceParameter->level_idc)) {
744         vme_context->h264_level = pSequenceParameter->level_idc;
745     }
746
747     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
748     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
749     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
750
751     /*Setup all the memory object*/
752     gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
753     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
754     //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
755     gen8_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1);
756
757     /*Programing media pipeline*/
758     gen8_vme_pipeline_programing(ctx, encode_state, encoder_context);
759
760     return vaStatus;
761 }
762
763 static VAStatus gen8_vme_run(VADriverContextP ctx,
764                              struct encode_state *encode_state,
765                              struct intel_encoder_context *encoder_context)
766 {
767     struct intel_batchbuffer *batch = encoder_context->base.batch;
768
769     intel_batchbuffer_flush(batch);
770
771     return VA_STATUS_SUCCESS;
772 }
773
774 static VAStatus gen8_vme_stop(VADriverContextP ctx,
775                               struct encode_state *encode_state,
776                               struct intel_encoder_context *encoder_context)
777 {
778     return VA_STATUS_SUCCESS;
779 }
780
781 static VAStatus
782 gen8_vme_pipeline(VADriverContextP ctx,
783                   VAProfile profile,
784                   struct encode_state *encode_state,
785                   struct intel_encoder_context *encoder_context)
786 {
787     gen8_vme_media_init(ctx, encoder_context);
788     gen8_vme_prepare(ctx, encode_state, encoder_context);
789     gen8_vme_run(ctx, encode_state, encoder_context);
790     gen8_vme_stop(ctx, encode_state, encoder_context);
791
792     return VA_STATUS_SUCCESS;
793 }
794
795 static void
796 gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
797                                    struct encode_state *encode_state,
798                                    int index,
799                                    int is_intra,
800                                    struct intel_encoder_context *encoder_context)
801 {
802     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
803     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
804     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
805
806     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
807 }
808
809 static void
810 gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
811                                             struct encode_state *encode_state,
812                                             int index,
813                                             struct intel_encoder_context *encoder_context)
814 {
815     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
816     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
817     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
818
819     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
820 }
821
822 static VAStatus
823 gen8_vme_mpeg2_surface_setup(VADriverContextP ctx,
824                              struct encode_state *encode_state,
825                              int is_intra,
826                              struct intel_encoder_context *encoder_context)
827 {
828     struct object_surface *obj_surface;
829
830     /*Setup surfaces state*/
831     /* current picture for encoding */
832     obj_surface = encode_state->input_yuv_object;
833     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
834     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
835     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
836
837     if (!is_intra) {
838         /* reference 0 */
839         obj_surface = encode_state->reference_objects[0];
840
841         if (obj_surface->bo != NULL)
842             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
843
844         /* reference 1 */
845         obj_surface = encode_state->reference_objects[1];
846
847         if (obj_surface && obj_surface->bo != NULL)
848             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
849     }
850
851     /* VME output */
852     gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
853     gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
854
855     return VA_STATUS_SUCCESS;
856 }
857
858 static void
859 gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
860                                              struct encode_state *encode_state,
861                                              int mb_width, int mb_height,
862                                              int kernel,
863                                              struct intel_encoder_context *encoder_context)
864 {
865     struct gen6_vme_context *vme_context = encoder_context->vme_context;
866     unsigned int *command_ptr;
867
868 #define     MPEG2_SCOREBOARD        (1 << 21)
869
870     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
871     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
872
873     {
874         unsigned int mb_intra_ub, score_dep;
875         int x_outer, y_outer, x_inner, y_inner;
876         int xtemp_outer = 0;
877         int first_mb = 0;
878         int num_mb = mb_width * mb_height;
879
880         x_outer = 0;
881         y_outer = 0;
882
883
884         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
885             x_inner = x_outer;
886             y_inner = y_outer;
887             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
888                 mb_intra_ub = 0;
889                 score_dep = 0;
890                 if (x_inner != 0) {
891                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
892                     score_dep |= MB_SCOREBOARD_A;
893                 }
894                 if (y_inner != 0) {
895                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
896                     score_dep |= MB_SCOREBOARD_B;
897
898                     if (x_inner != 0)
899                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
900
901                     if (x_inner != (mb_width - 1)) {
902                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
903                         score_dep |= MB_SCOREBOARD_C;
904                     }
905                 }
906
907                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
908                 *command_ptr++ = kernel;
909                 *command_ptr++ = MPEG2_SCOREBOARD;
910                 /* Indirect data */
911                 *command_ptr++ = 0;
912                 /* the (X, Y) term of scoreboard */
913                 *command_ptr++ = ((y_inner << 16) | x_inner);
914                 *command_ptr++ = score_dep;
915                 /*inline data */
916                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
917                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
918                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
919                 *command_ptr++ = 0;
920
921                 x_inner -= 2;
922                 y_inner += 1;
923             }
924             x_outer += 1;
925         }
926
927         xtemp_outer = mb_width - 2;
928         if (xtemp_outer < 0)
929             xtemp_outer = 0;
930         x_outer = xtemp_outer;
931         y_outer = 0;
932         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
933             y_inner = y_outer;
934             x_inner = x_outer;
935             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
936                 mb_intra_ub = 0;
937                 score_dep = 0;
938                 if (x_inner != 0) {
939                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
940                     score_dep |= MB_SCOREBOARD_A;
941                 }
942                 if (y_inner != 0) {
943                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
944                     score_dep |= MB_SCOREBOARD_B;
945
946                     if (x_inner != 0)
947                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
948
949                     if (x_inner != (mb_width - 1)) {
950                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
951                         score_dep |= MB_SCOREBOARD_C;
952                     }
953                 }
954
955                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
956                 *command_ptr++ = kernel;
957                 *command_ptr++ = MPEG2_SCOREBOARD;
958                 /* Indirect data */
959                 *command_ptr++ = 0;
960                 /* the (X, Y) term of scoreboard */
961                 *command_ptr++ = ((y_inner << 16) | x_inner);
962                 *command_ptr++ = score_dep;
963                 /*inline data */
964                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
965                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
966
967                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
968                 *command_ptr++ = 0;
969                 x_inner -= 2;
970                 y_inner += 1;
971             }
972             x_outer++;
973             if (x_outer >= mb_width) {
974                 y_outer += 1;
975                 x_outer = xtemp_outer;
976             }
977         }
978     }
979
980     *command_ptr++ = MI_BATCH_BUFFER_END;
981     *command_ptr++ = 0;
982
983     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
984     return;
985 }
986
987 static void
988 gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
989                                     struct encode_state *encode_state,
990                                     int mb_width, int mb_height,
991                                     int kernel,
992                                     int transform_8x8_mode_flag,
993                                     struct intel_encoder_context *encoder_context)
994 {
995     struct gen6_vme_context *vme_context = encoder_context->vme_context;
996     int mb_x = 0, mb_y = 0;
997     int i, s, j;
998     unsigned int *command_ptr;
999
1000
1001     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1002     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1003
1004     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1005         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1006
1007         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1008             int slice_mb_begin = slice_param->macroblock_address;
1009             int slice_mb_number = slice_param->num_macroblocks;
1010             unsigned int mb_intra_ub;
1011
1012             for (i = 0; i < slice_mb_number;) {
1013                 int mb_count = i + slice_mb_begin;
1014
1015                 mb_x = mb_count % mb_width;
1016                 mb_y = mb_count / mb_width;
1017                 mb_intra_ub = 0;
1018
1019                 if (mb_x != 0) {
1020                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1021                 }
1022
1023                 if (mb_y != 0) {
1024                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1025
1026                     if (mb_x != 0)
1027                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1028
1029                     if (mb_x != (mb_width - 1))
1030                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1031                 }
1032
1033                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1034                 *command_ptr++ = kernel;
1035                 *command_ptr++ = 0;
1036                 *command_ptr++ = 0;
1037                 *command_ptr++ = 0;
1038                 *command_ptr++ = 0;
1039
1040                 /*inline data */
1041                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1042                 *command_ptr++ = ((1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1043
1044                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1045                 *command_ptr++ = 0;
1046                 i += 1;
1047             }
1048
1049             slice_param++;
1050         }
1051     }
1052
1053     *command_ptr++ = MI_BATCH_BUFFER_END;
1054     *command_ptr++ = 0;
1055
1056     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1057 }
1058
1059 static void
1060 gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1061                                    struct encode_state *encode_state,
1062                                    int is_intra,
1063                                    struct intel_encoder_context *encoder_context)
1064 {
1065     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1066     struct intel_batchbuffer *batch = encoder_context->base.batch;
1067     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1068     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1069     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1070     bool allow_hwscore = true;
1071     int s;
1072     int kernel_shader;
1073     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1074
1075     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1076         int j;
1077         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1078
1079         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1080             if (slice_param->macroblock_address % width_in_mbs) {
1081                 allow_hwscore = false;
1082                 break;
1083             }
1084         }
1085     }
1086
1087     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1088     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1089         allow_hwscore = false;
1090         kernel_shader = VME_INTRA_SHADER;
1091     } else {
1092         kernel_shader = VME_INTER_SHADER;
1093     }
1094
1095     if (allow_hwscore)
1096         gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1097                                                      encode_state,
1098                                                      width_in_mbs, height_in_mbs,
1099                                                      kernel_shader,
1100                                                      encoder_context);
1101     else
1102         gen8_vme_mpeg2_fill_vme_batchbuffer(ctx,
1103                                             encode_state,
1104                                             width_in_mbs, height_in_mbs,
1105                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1106                                             0,
1107                                             encoder_context);
1108
1109     intel_batchbuffer_start_atomic(batch, 0x1000);
1110     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1111     BEGIN_BATCH(batch, 4);
1112     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1113     OUT_RELOC64(batch,
1114                 vme_context->vme_batchbuffer.bo,
1115                 I915_GEM_DOMAIN_COMMAND, 0,
1116                 0);
1117     OUT_BATCH(batch, 0);
1118     ADVANCE_BATCH(batch);
1119
1120     intel_batchbuffer_end_atomic(batch);
1121 }
1122
1123 static VAStatus
1124 gen8_vme_mpeg2_prepare(VADriverContextP ctx,
1125                        struct encode_state *encode_state,
1126                        struct intel_encoder_context *encoder_context)
1127 {
1128     VAStatus vaStatus = VA_STATUS_SUCCESS;
1129     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1130     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1131     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1132
1133     if ((!vme_context->mpeg2_level) ||
1134         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1135         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1136     }
1137
1138
1139     /*Setup all the memory object*/
1140     gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1141     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1142     //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1143     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1144     gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1145
1146     /*Programing media pipeline*/
1147     gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1148
1149     return vaStatus;
1150 }
1151
1152 static VAStatus
1153 gen8_vme_mpeg2_pipeline(VADriverContextP ctx,
1154                         VAProfile profile,
1155                         struct encode_state *encode_state,
1156                         struct intel_encoder_context *encoder_context)
1157 {
1158     gen8_vme_media_init(ctx, encoder_context);
1159     gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1160     gen8_vme_run(ctx, encode_state, encoder_context);
1161     gen8_vme_stop(ctx, encode_state, encoder_context);
1162
1163     return VA_STATUS_SUCCESS;
1164 }
1165
1166 static void
1167 gen8_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1168                                  struct encode_state *encode_state,
1169                                  int index,
1170                                  int is_intra,
1171                                  struct intel_encoder_context *encoder_context)
1172 {
1173     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1174     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1175     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1176
1177     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1178 }
1179
1180 static void
1181 gen8_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1182                                           struct encode_state *encode_state,
1183                                           int index,
1184                                           struct intel_encoder_context *encoder_context)
1185 {
1186     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1187     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1188     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1189
1190     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1191 }
1192
1193 static VAStatus
1194 gen8_vme_vp8_surface_setup(VADriverContextP ctx,
1195                            struct encode_state *encode_state,
1196                            int is_intra,
1197                            struct intel_encoder_context *encoder_context)
1198 {
1199     struct object_surface *obj_surface;
1200
1201     /*Setup surfaces state*/
1202     /* current picture for encoding */
1203     obj_surface = encode_state->input_yuv_object;
1204     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1205     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1206     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1207
1208     if (!is_intra) {
1209         /* reference 0 */
1210         obj_surface = encode_state->reference_objects[0];
1211
1212         if (obj_surface->bo != NULL)
1213             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1214
1215         /* reference 1 */
1216         obj_surface = encode_state->reference_objects[1];
1217
1218         if (obj_surface && obj_surface->bo != NULL)
1219             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1220     }
1221
1222     /* VME output */
1223     gen8_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1224     gen8_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1225
1226     return VA_STATUS_SUCCESS;
1227 }
1228
1229 static void
1230 gen8_vme_vp8_pipeline_programing(VADriverContextP ctx,
1231                                  struct encode_state *encode_state,
1232                                  int is_intra,
1233                                  struct intel_encoder_context *encoder_context)
1234 {
1235     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1236     struct intel_batchbuffer *batch = encoder_context->base.batch;
1237     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1238     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1239     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1240     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1241
1242     gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1243                                                  encode_state,
1244                                                  width_in_mbs, height_in_mbs,
1245                                                  kernel_shader,
1246                                                  encoder_context);
1247
1248     intel_batchbuffer_start_atomic(batch, 0x1000);
1249     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1250     BEGIN_BATCH(batch, 4);
1251     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1252     OUT_RELOC64(batch,
1253                 vme_context->vme_batchbuffer.bo,
1254                 I915_GEM_DOMAIN_COMMAND, 0,
1255                 0);
1256     OUT_BATCH(batch, 0);
1257     ADVANCE_BATCH(batch);
1258
1259     intel_batchbuffer_end_atomic(batch);
1260 }
1261
1262 static VAStatus gen8_vme_vp8_prepare(VADriverContextP ctx,
1263                                      struct encode_state *encode_state,
1264                                      struct intel_encoder_context *encoder_context)
1265 {
1266     VAStatus vaStatus = VA_STATUS_SUCCESS;
1267     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1268     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1269
1270     /* update vp8 mbmv cost */
1271     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1272
1273     /*Setup all the memory object*/
1274     gen8_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1275     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1276     gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1277
1278     /*Programing media pipeline*/
1279     gen8_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1280
1281     return vaStatus;
1282 }
1283
1284 static VAStatus
1285 gen8_vme_vp8_pipeline(VADriverContextP ctx,
1286                       VAProfile profile,
1287                       struct encode_state *encode_state,
1288                       struct intel_encoder_context *encoder_context)
1289 {
1290     gen8_vme_media_init(ctx, encoder_context);
1291     gen8_vme_vp8_prepare(ctx, encode_state, encoder_context);
1292     gen8_vme_run(ctx, encode_state, encoder_context);
1293     gen8_vme_stop(ctx, encode_state, encoder_context);
1294
1295     return VA_STATUS_SUCCESS;
1296 }
1297
1298 static void
1299 gen8_vme_context_destroy(void *context)
1300 {
1301     struct gen6_vme_context *vme_context = context;
1302
1303     gen8_gpe_context_destroy(&vme_context->gpe_context);
1304
1305     dri_bo_unreference(vme_context->vme_output.bo);
1306     vme_context->vme_output.bo = NULL;
1307
1308     dri_bo_unreference(vme_context->vme_state.bo);
1309     vme_context->vme_state.bo = NULL;
1310
1311     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1312     vme_context->vme_batchbuffer.bo = NULL;
1313
1314     free(vme_context->vme_state_message);
1315     vme_context->vme_state_message = NULL;
1316
1317     dri_bo_unreference(vme_context->i_qp_cost_table);
1318     vme_context->i_qp_cost_table = NULL;
1319
1320     dri_bo_unreference(vme_context->p_qp_cost_table);
1321     vme_context->p_qp_cost_table = NULL;
1322
1323     dri_bo_unreference(vme_context->b_qp_cost_table);
1324     vme_context->b_qp_cost_table = NULL;
1325
1326     free(vme_context->qp_per_mb);
1327     vme_context->qp_per_mb = NULL;
1328
1329     free(vme_context);
1330 }
1331
1332 extern Bool i965_encoder_vp8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
1333
1334 Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1335 {
1336     struct i965_driver_data *i965 = i965_driver_data(ctx);
1337     struct gen6_vme_context *vme_context = NULL;
1338     struct i965_kernel *vme_kernel_list = NULL;
1339     int i965_kernel_num;
1340
1341     if (IS_CHERRYVIEW(i965->intel.device_info) && encoder_context->codec == CODEC_VP8) {
1342         return i965_encoder_vp8_vme_context_init(ctx, encoder_context);
1343     } else if (IS_GEN8(i965->intel.device_info) && (
1344                    encoder_context->codec == CODEC_H264 ||
1345                    encoder_context->codec == CODEC_H264_MVC)) {
1346         return gen9_avc_vme_context_init(ctx, encoder_context);
1347     }
1348     switch (encoder_context->codec) {
1349     case CODEC_H264:
1350     case CODEC_H264_MVC:
1351         vme_kernel_list = gen8_vme_kernels;
1352         encoder_context->vme_pipeline = gen8_vme_pipeline;
1353         i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel);
1354         break;
1355
1356     case CODEC_MPEG2:
1357         vme_kernel_list = gen8_vme_mpeg2_kernels;
1358         encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline;
1359         i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
1360         break;
1361
1362     case CODEC_JPEG:
1363         //JPEG encode doesnt have vme. So, set the corresponding fields to NULL.
1364         encoder_context->vme_context = NULL;
1365         encoder_context->vme_pipeline = NULL;
1366         encoder_context->vme_context_destroy = NULL;
1367         break;
1368
1369     case CODEC_VP8:
1370         vme_kernel_list = gen8_vme_vp8_kernels;
1371         encoder_context->vme_pipeline = gen8_vme_vp8_pipeline;
1372         i965_kernel_num = sizeof(gen8_vme_vp8_kernels) / sizeof(struct i965_kernel);
1373         break;
1374
1375     default:
1376         /* never get here */
1377         assert(0);
1378
1379         break;
1380     }
1381
1382     //If the codec is JPEG, bypass VME
1383     if (encoder_context->codec != CODEC_JPEG) {
1384         vme_context = calloc(1, sizeof(struct gen6_vme_context));
1385         assert(vme_context);
1386         vme_context->vme_kernel_sum = i965_kernel_num;
1387         vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1388
1389         vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
1390         vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1391
1392         vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1393         vme_context->gpe_context.sampler.entry_size = 0;
1394         vme_context->gpe_context.sampler.max_entries = 0;
1395
1396         if (i965->intel.eu_total > 0) {
1397             vme_context->gpe_context.vfe_state.max_num_threads = 6 *
1398                                                                  i965->intel.eu_total;
1399         } else
1400             vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1401
1402         vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1403         vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1404         vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1405         vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1406
1407         gen7_vme_scoreboard_init(ctx, vme_context);
1408
1409         gen8_gpe_load_kernels(ctx,
1410                               &vme_context->gpe_context,
1411                               vme_kernel_list,
1412                               i965_kernel_num);
1413         vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1414         vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1415         vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1416         vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1417
1418         encoder_context->vme_context = vme_context;
1419         encoder_context->vme_context_destroy = gen8_vme_context_destroy;
1420
1421         vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1422     }
1423
1424     return True;
1425 }