OSDN Git Service

Unify the coding style in the driver
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_vme.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43
44 #ifdef SURFACE_STATE_PADDED_SIZE
45 #undef SURFACE_STATE_PADDED_SIZE
46 #endif
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define VME_INTRA_SHADER        0
53 #define VME_INTER_SHADER        1
54 #define VME_BINTER_SHADER   2
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 #define VME_MSG_LENGTH      32
61
62 static const uint32_t gen8_vme_intra_frame[][4] = {
63 #include "shaders/vme/intra_frame_gen8.g8b"
64 };
65
66 static const uint32_t gen8_vme_inter_frame[][4] = {
67 #include "shaders/vme/inter_frame_gen8.g8b"
68 };
69
70 static const uint32_t gen8_vme_inter_bframe[][4] = {
71 #include "shaders/vme/inter_bframe_gen8.g8b"
72 };
73
74 static struct i965_kernel gen8_vme_kernels[] = {
75     {
76         "VME Intra Frame",
77         VME_INTRA_SHADER, /*index*/
78         gen8_vme_intra_frame,
79         sizeof(gen8_vme_intra_frame),
80         NULL
81     },
82     {
83         "VME inter Frame",
84         VME_INTER_SHADER,
85         gen8_vme_inter_frame,
86         sizeof(gen8_vme_inter_frame),
87         NULL
88     },
89     {
90         "VME inter BFrame",
91         VME_BINTER_SHADER,
92         gen8_vme_inter_bframe,
93         sizeof(gen8_vme_inter_bframe),
94         NULL
95     }
96 };
97
98 static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = {
99 #include "shaders/vme/intra_frame_gen8.g8b"
100 };
101
102 static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = {
103 #include "shaders/vme/mpeg2_inter_gen8.g8b"
104 };
105
106 static struct i965_kernel gen8_vme_mpeg2_kernels[] = {
107     {
108         "VME Intra Frame",
109         VME_INTRA_SHADER, /*index*/
110         gen8_vme_mpeg2_intra_frame,
111         sizeof(gen8_vme_mpeg2_intra_frame),
112         NULL
113     },
114     {
115         "VME inter Frame",
116         VME_INTER_SHADER,
117         gen8_vme_mpeg2_inter_frame,
118         sizeof(gen8_vme_mpeg2_inter_frame),
119         NULL
120     },
121 };
122
123 static const uint32_t gen8_vme_vp8_intra_frame[][4] = {
124 #include "shaders/vme/vp8_intra_frame_gen8.g8b"
125 };
126
127 static const uint32_t gen8_vme_vp8_inter_frame[][4] = {
128 #include "shaders/vme/vp8_inter_frame_gen8.g8b"
129 };
130
131 static struct i965_kernel gen8_vme_vp8_kernels[] = {
132     {
133         "VME Intra Frame",
134         VME_INTRA_SHADER, /*index*/
135         gen8_vme_vp8_intra_frame,
136         sizeof(gen8_vme_vp8_intra_frame),
137         NULL
138     },
139     {
140         "VME inter Frame",
141         VME_INTER_SHADER,
142         gen8_vme_vp8_inter_frame,
143         sizeof(gen8_vme_vp8_inter_frame),
144         NULL
145     },
146 };
147
148 /* only used for VME source surface state */
149 static void
150 gen8_vme_source_surface_state(VADriverContextP ctx,
151                               int index,
152                               struct object_surface *obj_surface,
153                               struct intel_encoder_context *encoder_context)
154 {
155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
156
157     vme_context->vme_surface2_setup(ctx,
158                                     &vme_context->gpe_context,
159                                     obj_surface,
160                                     BINDING_TABLE_OFFSET(index),
161                                     SURFACE_STATE_OFFSET(index));
162 }
163
164 static void
165 gen8_vme_media_source_surface_state(VADriverContextP ctx,
166                                     int index,
167                                     struct object_surface *obj_surface,
168                                     struct intel_encoder_context *encoder_context)
169 {
170     struct gen6_vme_context *vme_context = encoder_context->vme_context;
171
172     vme_context->vme_media_rw_surface_setup(ctx,
173                                             &vme_context->gpe_context,
174                                             obj_surface,
175                                             BINDING_TABLE_OFFSET(index),
176                                             SURFACE_STATE_OFFSET(index),
177                                             0);
178 }
179
180 static void
181 gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx,
182                                            int index,
183                                            struct object_surface *obj_surface,
184                                            struct intel_encoder_context *encoder_context)
185 {
186     struct gen6_vme_context *vme_context = encoder_context->vme_context;
187
188     vme_context->vme_media_chroma_surface_setup(ctx,
189                                                 &vme_context->gpe_context,
190                                                 obj_surface,
191                                                 BINDING_TABLE_OFFSET(index),
192                                                 SURFACE_STATE_OFFSET(index),
193                                                 0);
194 }
195
196 static void
197 gen8_vme_output_buffer_setup(VADriverContextP ctx,
198                              struct encode_state *encode_state,
199                              int index,
200                              struct intel_encoder_context *encoder_context,
201                              int is_intra,
202                              int width_in_mbs,
203                              int height_in_mbs)
204
205 {
206     struct i965_driver_data *i965 = i965_driver_data(ctx);
207     struct gen6_vme_context *vme_context = encoder_context->vme_context;
208
209     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
210     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
211
212     if (is_intra)
213         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
214     else
215         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
216     /*
217      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
218      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
219      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
220      */
221
222     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
223                                               "VME output buffer",
224                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
225                                               0x1000);
226     assert(vme_context->vme_output.bo);
227     vme_context->vme_buffer_suface_setup(ctx,
228                                          &vme_context->gpe_context,
229                                          &vme_context->vme_output,
230                                          BINDING_TABLE_OFFSET(index),
231                                          SURFACE_STATE_OFFSET(index));
232 }
233
234 static void
235 gen8_vme_avc_output_buffer_setup(VADriverContextP ctx,
236                                  struct encode_state *encode_state,
237                                  int index,
238                                  struct intel_encoder_context *encoder_context)
239 {
240     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
241     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
242     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
243     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
244     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
245
246     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
247
248 }
249
250 static void
251 gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
252                                       struct encode_state *encode_state,
253                                       int index,
254                                       struct intel_encoder_context *encoder_context,
255                                       int width_in_mbs,
256                                       int height_in_mbs)
257 {
258     struct i965_driver_data *i965 = i965_driver_data(ctx);
259     struct gen6_vme_context *vme_context = encoder_context->vme_context;
260
261     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
262     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
263     vme_context->vme_batchbuffer.pitch = 16;
264     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
265                                                    "VME batchbuffer",
266                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
267                                                    0x1000);
268     vme_context->vme_buffer_suface_setup(ctx,
269                                          &vme_context->gpe_context,
270                                          &vme_context->vme_batchbuffer,
271                                          BINDING_TABLE_OFFSET(index),
272                                          SURFACE_STATE_OFFSET(index));
273 }
274
275 static void
276 gen8_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
277                                           struct encode_state *encode_state,
278                                           int index,
279                                           struct intel_encoder_context *encoder_context)
280 {
281     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
282     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
283     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
284
285     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
286 }
287
288 static VAStatus
289 gen8_vme_surface_setup(VADriverContextP ctx,
290                        struct encode_state *encode_state,
291                        int is_intra,
292                        struct intel_encoder_context *encoder_context)
293 {
294     struct object_surface *obj_surface;
295
296     /*Setup surfaces state*/
297     /* current picture for encoding */
298     obj_surface = encode_state->input_yuv_object;
299     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
300     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
301     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
302
303     if (!is_intra) {
304         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
305         int slice_type;
306
307         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
308         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
309
310         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state);
311
312         if (slice_type == SLICE_TYPE_B)
313             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state);
314     }
315
316     /* VME output */
317     gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
318     gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
319     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
320                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
321                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
322
323     return VA_STATUS_SUCCESS;
324 }
325
326 static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
327                                          struct encode_state *encode_state,
328                                          struct intel_encoder_context *encoder_context)
329 {
330     struct gen6_vme_context *vme_context = encoder_context->vme_context;
331     struct gen8_interface_descriptor_data *desc;
332     int i;
333     dri_bo *bo;
334     unsigned char *desc_ptr;
335
336     bo = vme_context->gpe_context.idrt.bo;
337     dri_bo_map(bo, 1);
338     assert(bo->virtual);
339     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;
340
341     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
342
343     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
344         struct i965_kernel *kernel;
345         kernel = &vme_context->gpe_context.kernels[i];
346         assert(sizeof(*desc) == 32);
347         /*Setup the descritor table*/
348         memset(desc, 0, sizeof(*desc));
349         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
350         desc->desc3.sampler_count = 0; /* FIXME: */
351         desc->desc3.sampler_state_pointer = 0;
352         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
353         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
354         desc->desc5.constant_urb_entry_read_offset = 0;
355         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
356
357
358         desc++;
359     }
360
361     dri_bo_unmap(bo);
362
363     return VA_STATUS_SUCCESS;
364 }
365
366 static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
367                                         struct encode_state *encode_state,
368                                         struct intel_encoder_context *encoder_context,
369                                         int denom)
370 {
371     struct gen6_vme_context *vme_context = encoder_context->vme_context;
372     unsigned char *constant_buffer;
373     unsigned int *vme_state_message;
374     int mv_num = 32;
375
376     vme_state_message = (unsigned int *)vme_context->vme_state_message;
377
378     if (encoder_context->codec == CODEC_H264 ||
379         encoder_context->codec == CODEC_H264_MVC) {
380         if (vme_context->h264_level >= 30) {
381             mv_num = 16 / denom;
382
383             if (vme_context->h264_level >= 31)
384                 mv_num = 8 / denom;
385         }
386     } else if (encoder_context->codec == CODEC_MPEG2) {
387         mv_num = 2 / denom;
388     }
389
390     vme_state_message[31] = mv_num;
391
392     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
393     assert(vme_context->gpe_context.curbe.bo->virtual);
394     constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
395                       vme_context->gpe_context.curbe.offset;
396
397     /* VME MV/Mb cost table is passed by using const buffer */
398     /* Now it uses the fixed search path. So it is constructed directly
399      * in the GPU shader.
400      */
401     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
402
403     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
404
405     return VA_STATUS_SUCCESS;
406 }
407
408 #define     MB_SCOREBOARD_A     (1 << 0)
409 #define     MB_SCOREBOARD_B     (1 << 1)
410 #define     MB_SCOREBOARD_C     (1 << 2)
411
412 /* check whether the mb of (x_index, y_index) is out of bound */
413 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
414 {
415     int mb_index;
416     if (x_index < 0 || x_index >= mb_width)
417         return -1;
418     if (y_index < 0 || y_index >= mb_height)
419         return -1;
420
421     mb_index = y_index * mb_width + x_index;
422     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
423         return -1;
424     return 0;
425 }
426
427 static void
428 gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
429                                        struct encode_state *encode_state,
430                                        int mb_width, int mb_height,
431                                        int kernel,
432                                        int transform_8x8_mode_flag,
433                                        struct intel_encoder_context *encoder_context)
434 {
435     struct gen6_vme_context *vme_context = encoder_context->vme_context;
436     int mb_row;
437     int s;
438     unsigned int *command_ptr;
439
440 #define     USE_SCOREBOARD      (1 << 21)
441
442     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
443     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
444
445     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
446         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
447         int first_mb = pSliceParameter->macroblock_address;
448         int num_mb = pSliceParameter->num_macroblocks;
449         unsigned int mb_intra_ub, score_dep;
450         int x_outer, y_outer, x_inner, y_inner;
451         int xtemp_outer = 0;
452
453         x_outer = first_mb % mb_width;
454         y_outer = first_mb / mb_width;
455         mb_row = y_outer;
456
457         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
458             x_inner = x_outer;
459             y_inner = y_outer;
460             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
461                 mb_intra_ub = 0;
462                 score_dep = 0;
463                 if (x_inner != 0) {
464                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
465                     score_dep |= MB_SCOREBOARD_A;
466                 }
467                 if (y_inner != mb_row) {
468                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
469                     score_dep |= MB_SCOREBOARD_B;
470                     if (x_inner != 0)
471                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
472                     if (x_inner != (mb_width - 1)) {
473                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
474                         score_dep |= MB_SCOREBOARD_C;
475                     }
476                 }
477
478                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
479                 *command_ptr++ = kernel;
480                 *command_ptr++ = USE_SCOREBOARD;
481                 /* Indirect data */
482                 *command_ptr++ = 0;
483                 /* the (X, Y) term of scoreboard */
484                 *command_ptr++ = ((y_inner << 16) | x_inner);
485                 *command_ptr++ = score_dep;
486                 /*inline data */
487                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
488                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
489                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
490                 *command_ptr++ = 0;
491
492                 x_inner -= 2;
493                 y_inner += 1;
494             }
495             x_outer += 1;
496         }
497
498         xtemp_outer = mb_width - 2;
499         if (xtemp_outer < 0)
500             xtemp_outer = 0;
501         x_outer = xtemp_outer;
502         y_outer = first_mb / mb_width;
503         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
504             y_inner = y_outer;
505             x_inner = x_outer;
506             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
507                 mb_intra_ub = 0;
508                 score_dep = 0;
509                 if (x_inner != 0) {
510                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
511                     score_dep |= MB_SCOREBOARD_A;
512                 }
513                 if (y_inner != mb_row) {
514                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
515                     score_dep |= MB_SCOREBOARD_B;
516                     if (x_inner != 0)
517                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
518
519                     if (x_inner != (mb_width - 1)) {
520                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
521                         score_dep |= MB_SCOREBOARD_C;
522                     }
523                 }
524
525                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
526                 *command_ptr++ = kernel;
527                 *command_ptr++ = USE_SCOREBOARD;
528                 /* Indirect data */
529                 *command_ptr++ = 0;
530                 /* the (X, Y) term of scoreboard */
531                 *command_ptr++ = ((y_inner << 16) | x_inner);
532                 *command_ptr++ = score_dep;
533                 /*inline data */
534                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
535                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
536
537                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
538                 *command_ptr++ = 0;
539                 x_inner -= 2;
540                 y_inner += 1;
541             }
542             x_outer++;
543             if (x_outer >= mb_width) {
544                 y_outer += 1;
545                 x_outer = xtemp_outer;
546             }
547         }
548     }
549
550     *command_ptr++ = MI_BATCH_BUFFER_END;
551     *command_ptr++ = 0;
552
553     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
554 }
555
556 static void
557 gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx,
558                               struct encode_state *encode_state,
559                               int mb_width, int mb_height,
560                               int kernel,
561                               int transform_8x8_mode_flag,
562                               struct intel_encoder_context *encoder_context)
563 {
564     struct gen6_vme_context *vme_context = encoder_context->vme_context;
565     int mb_x = 0, mb_y = 0;
566     int i, s;
567     unsigned int *command_ptr;
568     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
569     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
570     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
571     int qp;
572     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
573     int qp_mb, qp_index;
574
575     if (encoder_context->rate_control_mode == VA_RC_CQP)
576         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
577     else
578         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
579
580     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
581     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
582
583     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
584         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
585         int slice_mb_begin = pSliceParameter->macroblock_address;
586         int slice_mb_number = pSliceParameter->num_macroblocks;
587         unsigned int mb_intra_ub;
588         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
589         for (i = 0; i < slice_mb_number;) {
590             int mb_count = i + slice_mb_begin;
591             mb_x = mb_count % mb_width;
592             mb_y = mb_count / mb_width;
593             mb_intra_ub = 0;
594             if (mb_x != 0) {
595                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
596             }
597             if (mb_y != 0) {
598                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
599                 if (mb_x != 0)
600                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
601                 if (mb_x != (mb_width - 1))
602                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
603             }
604             if (i < mb_width) {
605                 if (i == 0)
606                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
607                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
608                 if ((i == (mb_width - 1)) && slice_mb_x) {
609                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
610                 }
611             }
612
613             if ((i == mb_width) && slice_mb_x) {
614                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
615             }
616             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
617             *command_ptr++ = kernel;
618             *command_ptr++ = 0;
619             *command_ptr++ = 0;
620             *command_ptr++ = 0;
621             *command_ptr++ = 0;
622
623             /*inline data */
624             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
625             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
626             /* qp occupies one byte */
627             if (vme_context->roi_enabled) {
628                 qp_index = mb_y * mb_width + mb_x;
629                 qp_mb = *(vme_context->qp_per_mb + qp_index);
630             } else
631                 qp_mb = qp;
632             *command_ptr++ = qp_mb;
633
634             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
635             *command_ptr++ = 0;
636             i += 1;
637         }
638     }
639
640     *command_ptr++ = MI_BATCH_BUFFER_END;
641     *command_ptr++ = 0;
642
643     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
644 }
645
646 static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
647 {
648     struct gen6_vme_context *vme_context = encoder_context->vme_context;
649
650     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
651
652     /* VME output buffer */
653     dri_bo_unreference(vme_context->vme_output.bo);
654     vme_context->vme_output.bo = NULL;
655
656     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
657     vme_context->vme_batchbuffer.bo = NULL;
658
659     /* VME state */
660     dri_bo_unreference(vme_context->vme_state.bo);
661     vme_context->vme_state.bo = NULL;
662 }
663
664 static void gen8_vme_pipeline_programing(VADriverContextP ctx,
665                                          struct encode_state *encode_state,
666                                          struct intel_encoder_context *encoder_context)
667 {
668     struct gen6_vme_context *vme_context = encoder_context->vme_context;
669     struct intel_batchbuffer *batch = encoder_context->base.batch;
670     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
671     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
672     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
673     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
674     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
675     int kernel_shader;
676     bool allow_hwscore = true;
677     int s;
678     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
679
680     if (is_low_quality)
681         allow_hwscore = false;
682     else {
683         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
684             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
685             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
686                 allow_hwscore = false;
687                 break;
688             }
689         }
690     }
691
692     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
693         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
694         kernel_shader = VME_INTRA_SHADER;
695     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
696                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
697         kernel_shader = VME_INTER_SHADER;
698     } else {
699         kernel_shader = VME_BINTER_SHADER;
700         if (!allow_hwscore)
701             kernel_shader = VME_INTER_SHADER;
702     }
703     if (allow_hwscore)
704         gen8wa_vme_walker_fill_vme_batchbuffer(ctx,
705                                                encode_state,
706                                                width_in_mbs, height_in_mbs,
707                                                kernel_shader,
708                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
709                                                encoder_context);
710     else
711         gen8_vme_fill_vme_batchbuffer(ctx,
712                                       encode_state,
713                                       width_in_mbs, height_in_mbs,
714                                       kernel_shader,
715                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
716                                       encoder_context);
717
718     intel_batchbuffer_start_atomic(batch, 0x1000);
719     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
720     BEGIN_BATCH(batch, 3);
721     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
722     OUT_RELOC64(batch,
723                 vme_context->vme_batchbuffer.bo,
724                 I915_GEM_DOMAIN_COMMAND, 0,
725                 0);
726     ADVANCE_BATCH(batch);
727
728     intel_batchbuffer_end_atomic(batch);
729 }
730
731 static VAStatus gen8_vme_prepare(VADriverContextP ctx,
732                                  struct encode_state *encode_state,
733                                  struct intel_encoder_context *encoder_context)
734 {
735     VAStatus vaStatus = VA_STATUS_SUCCESS;
736     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
737     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
738     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
739     struct gen6_vme_context *vme_context = encoder_context->vme_context;
740
741     if (!vme_context->h264_level ||
742         (vme_context->h264_level != pSequenceParameter->level_idc)) {
743         vme_context->h264_level = pSequenceParameter->level_idc;
744     }
745
746     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
747     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
748     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
749
750     /*Setup all the memory object*/
751     gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
752     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
753     //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
754     gen8_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1);
755
756     /*Programing media pipeline*/
757     gen8_vme_pipeline_programing(ctx, encode_state, encoder_context);
758
759     return vaStatus;
760 }
761
762 static VAStatus gen8_vme_run(VADriverContextP ctx,
763                              struct encode_state *encode_state,
764                              struct intel_encoder_context *encoder_context)
765 {
766     struct intel_batchbuffer *batch = encoder_context->base.batch;
767
768     intel_batchbuffer_flush(batch);
769
770     return VA_STATUS_SUCCESS;
771 }
772
773 static VAStatus gen8_vme_stop(VADriverContextP ctx,
774                               struct encode_state *encode_state,
775                               struct intel_encoder_context *encoder_context)
776 {
777     return VA_STATUS_SUCCESS;
778 }
779
780 static VAStatus
781 gen8_vme_pipeline(VADriverContextP ctx,
782                   VAProfile profile,
783                   struct encode_state *encode_state,
784                   struct intel_encoder_context *encoder_context)
785 {
786     gen8_vme_media_init(ctx, encoder_context);
787     gen8_vme_prepare(ctx, encode_state, encoder_context);
788     gen8_vme_run(ctx, encode_state, encoder_context);
789     gen8_vme_stop(ctx, encode_state, encoder_context);
790
791     return VA_STATUS_SUCCESS;
792 }
793
794 static void
795 gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
796                                    struct encode_state *encode_state,
797                                    int index,
798                                    int is_intra,
799                                    struct intel_encoder_context *encoder_context)
800 {
801     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
802     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
803     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
804
805     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
806 }
807
808 static void
809 gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
810                                             struct encode_state *encode_state,
811                                             int index,
812                                             struct intel_encoder_context *encoder_context)
813 {
814     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
815     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
816     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
817
818     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
819 }
820
821 static VAStatus
822 gen8_vme_mpeg2_surface_setup(VADriverContextP ctx,
823                              struct encode_state *encode_state,
824                              int is_intra,
825                              struct intel_encoder_context *encoder_context)
826 {
827     struct object_surface *obj_surface;
828
829     /*Setup surfaces state*/
830     /* current picture for encoding */
831     obj_surface = encode_state->input_yuv_object;
832     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
833     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
834     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
835
836     if (!is_intra) {
837         /* reference 0 */
838         obj_surface = encode_state->reference_objects[0];
839
840         if (obj_surface->bo != NULL)
841             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
842
843         /* reference 1 */
844         obj_surface = encode_state->reference_objects[1];
845
846         if (obj_surface && obj_surface->bo != NULL)
847             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
848     }
849
850     /* VME output */
851     gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
852     gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
853
854     return VA_STATUS_SUCCESS;
855 }
856
857 static void
858 gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
859                                              struct encode_state *encode_state,
860                                              int mb_width, int mb_height,
861                                              int kernel,
862                                              struct intel_encoder_context *encoder_context)
863 {
864     struct gen6_vme_context *vme_context = encoder_context->vme_context;
865     unsigned int *command_ptr;
866
867 #define     MPEG2_SCOREBOARD        (1 << 21)
868
869     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
870     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
871
872     {
873         unsigned int mb_intra_ub, score_dep;
874         int x_outer, y_outer, x_inner, y_inner;
875         int xtemp_outer = 0;
876         int first_mb = 0;
877         int num_mb = mb_width * mb_height;
878
879         x_outer = 0;
880         y_outer = 0;
881
882
883         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
884             x_inner = x_outer;
885             y_inner = y_outer;
886             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
887                 mb_intra_ub = 0;
888                 score_dep = 0;
889                 if (x_inner != 0) {
890                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
891                     score_dep |= MB_SCOREBOARD_A;
892                 }
893                 if (y_inner != 0) {
894                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
895                     score_dep |= MB_SCOREBOARD_B;
896
897                     if (x_inner != 0)
898                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
899
900                     if (x_inner != (mb_width - 1)) {
901                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
902                         score_dep |= MB_SCOREBOARD_C;
903                     }
904                 }
905
906                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
907                 *command_ptr++ = kernel;
908                 *command_ptr++ = MPEG2_SCOREBOARD;
909                 /* Indirect data */
910                 *command_ptr++ = 0;
911                 /* the (X, Y) term of scoreboard */
912                 *command_ptr++ = ((y_inner << 16) | x_inner);
913                 *command_ptr++ = score_dep;
914                 /*inline data */
915                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
916                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
917                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
918                 *command_ptr++ = 0;
919
920                 x_inner -= 2;
921                 y_inner += 1;
922             }
923             x_outer += 1;
924         }
925
926         xtemp_outer = mb_width - 2;
927         if (xtemp_outer < 0)
928             xtemp_outer = 0;
929         x_outer = xtemp_outer;
930         y_outer = 0;
931         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
932             y_inner = y_outer;
933             x_inner = x_outer;
934             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
935                 mb_intra_ub = 0;
936                 score_dep = 0;
937                 if (x_inner != 0) {
938                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
939                     score_dep |= MB_SCOREBOARD_A;
940                 }
941                 if (y_inner != 0) {
942                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
943                     score_dep |= MB_SCOREBOARD_B;
944
945                     if (x_inner != 0)
946                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
947
948                     if (x_inner != (mb_width - 1)) {
949                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
950                         score_dep |= MB_SCOREBOARD_C;
951                     }
952                 }
953
954                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
955                 *command_ptr++ = kernel;
956                 *command_ptr++ = MPEG2_SCOREBOARD;
957                 /* Indirect data */
958                 *command_ptr++ = 0;
959                 /* the (X, Y) term of scoreboard */
960                 *command_ptr++ = ((y_inner << 16) | x_inner);
961                 *command_ptr++ = score_dep;
962                 /*inline data */
963                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
964                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
965
966                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
967                 *command_ptr++ = 0;
968                 x_inner -= 2;
969                 y_inner += 1;
970             }
971             x_outer++;
972             if (x_outer >= mb_width) {
973                 y_outer += 1;
974                 x_outer = xtemp_outer;
975             }
976         }
977     }
978
979     *command_ptr++ = MI_BATCH_BUFFER_END;
980     *command_ptr++ = 0;
981
982     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
983     return;
984 }
985
986 static void
987 gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
988                                     struct encode_state *encode_state,
989                                     int mb_width, int mb_height,
990                                     int kernel,
991                                     int transform_8x8_mode_flag,
992                                     struct intel_encoder_context *encoder_context)
993 {
994     struct gen6_vme_context *vme_context = encoder_context->vme_context;
995     int mb_x = 0, mb_y = 0;
996     int i, s, j;
997     unsigned int *command_ptr;
998
999
1000     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1001     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1002
1003     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1004         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1005
1006         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1007             int slice_mb_begin = slice_param->macroblock_address;
1008             int slice_mb_number = slice_param->num_macroblocks;
1009             unsigned int mb_intra_ub;
1010
1011             for (i = 0; i < slice_mb_number;) {
1012                 int mb_count = i + slice_mb_begin;
1013
1014                 mb_x = mb_count % mb_width;
1015                 mb_y = mb_count / mb_width;
1016                 mb_intra_ub = 0;
1017
1018                 if (mb_x != 0) {
1019                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1020                 }
1021
1022                 if (mb_y != 0) {
1023                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1024
1025                     if (mb_x != 0)
1026                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1027
1028                     if (mb_x != (mb_width - 1))
1029                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1030                 }
1031
1032                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1033                 *command_ptr++ = kernel;
1034                 *command_ptr++ = 0;
1035                 *command_ptr++ = 0;
1036                 *command_ptr++ = 0;
1037                 *command_ptr++ = 0;
1038
1039                 /*inline data */
1040                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1041                 *command_ptr++ = ((1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1042
1043                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1044                 *command_ptr++ = 0;
1045                 i += 1;
1046             }
1047
1048             slice_param++;
1049         }
1050     }
1051
1052     *command_ptr++ = MI_BATCH_BUFFER_END;
1053     *command_ptr++ = 0;
1054
1055     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1056 }
1057
1058 static void
1059 gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1060                                    struct encode_state *encode_state,
1061                                    int is_intra,
1062                                    struct intel_encoder_context *encoder_context)
1063 {
1064     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1065     struct intel_batchbuffer *batch = encoder_context->base.batch;
1066     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1067     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1068     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1069     bool allow_hwscore = true;
1070     int s;
1071     int kernel_shader;
1072     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1073
1074     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1075         int j;
1076         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1077
1078         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1079             if (slice_param->macroblock_address % width_in_mbs) {
1080                 allow_hwscore = false;
1081                 break;
1082             }
1083         }
1084     }
1085
1086     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1087     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1088         allow_hwscore = false;
1089         kernel_shader = VME_INTRA_SHADER;
1090     } else {
1091         kernel_shader = VME_INTER_SHADER;
1092     }
1093
1094     if (allow_hwscore)
1095         gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1096                                                      encode_state,
1097                                                      width_in_mbs, height_in_mbs,
1098                                                      kernel_shader,
1099                                                      encoder_context);
1100     else
1101         gen8_vme_mpeg2_fill_vme_batchbuffer(ctx,
1102                                             encode_state,
1103                                             width_in_mbs, height_in_mbs,
1104                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1105                                             0,
1106                                             encoder_context);
1107
1108     intel_batchbuffer_start_atomic(batch, 0x1000);
1109     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1110     BEGIN_BATCH(batch, 4);
1111     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1112     OUT_RELOC64(batch,
1113                 vme_context->vme_batchbuffer.bo,
1114                 I915_GEM_DOMAIN_COMMAND, 0,
1115                 0);
1116     OUT_BATCH(batch, 0);
1117     ADVANCE_BATCH(batch);
1118
1119     intel_batchbuffer_end_atomic(batch);
1120 }
1121
1122 static VAStatus
1123 gen8_vme_mpeg2_prepare(VADriverContextP ctx,
1124                        struct encode_state *encode_state,
1125                        struct intel_encoder_context *encoder_context)
1126 {
1127     VAStatus vaStatus = VA_STATUS_SUCCESS;
1128     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1129     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1130     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1131
1132     if ((!vme_context->mpeg2_level) ||
1133         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1134         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1135     }
1136
1137
1138     /*Setup all the memory object*/
1139     gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1140     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1141     //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1142     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1143     gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1144
1145     /*Programing media pipeline*/
1146     gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1147
1148     return vaStatus;
1149 }
1150
1151 static VAStatus
1152 gen8_vme_mpeg2_pipeline(VADriverContextP ctx,
1153                         VAProfile profile,
1154                         struct encode_state *encode_state,
1155                         struct intel_encoder_context *encoder_context)
1156 {
1157     gen8_vme_media_init(ctx, encoder_context);
1158     gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1159     gen8_vme_run(ctx, encode_state, encoder_context);
1160     gen8_vme_stop(ctx, encode_state, encoder_context);
1161
1162     return VA_STATUS_SUCCESS;
1163 }
1164
1165 static void
1166 gen8_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1167                                  struct encode_state *encode_state,
1168                                  int index,
1169                                  int is_intra,
1170                                  struct intel_encoder_context *encoder_context)
1171 {
1172     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1173     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1174     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1175
1176     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1177 }
1178
1179 static void
1180 gen8_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1181                                           struct encode_state *encode_state,
1182                                           int index,
1183                                           struct intel_encoder_context *encoder_context)
1184 {
1185     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1186     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1187     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1188
1189     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1190 }
1191
1192 static VAStatus
1193 gen8_vme_vp8_surface_setup(VADriverContextP ctx,
1194                            struct encode_state *encode_state,
1195                            int is_intra,
1196                            struct intel_encoder_context *encoder_context)
1197 {
1198     struct object_surface *obj_surface;
1199
1200     /*Setup surfaces state*/
1201     /* current picture for encoding */
1202     obj_surface = encode_state->input_yuv_object;
1203     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1204     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1205     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1206
1207     if (!is_intra) {
1208         /* reference 0 */
1209         obj_surface = encode_state->reference_objects[0];
1210
1211         if (obj_surface->bo != NULL)
1212             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1213
1214         /* reference 1 */
1215         obj_surface = encode_state->reference_objects[1];
1216
1217         if (obj_surface && obj_surface->bo != NULL)
1218             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1219     }
1220
1221     /* VME output */
1222     gen8_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1223     gen8_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1224
1225     return VA_STATUS_SUCCESS;
1226 }
1227
1228 static void
1229 gen8_vme_vp8_pipeline_programing(VADriverContextP ctx,
1230                                  struct encode_state *encode_state,
1231                                  int is_intra,
1232                                  struct intel_encoder_context *encoder_context)
1233 {
1234     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1235     struct intel_batchbuffer *batch = encoder_context->base.batch;
1236     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1237     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1238     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1239     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1240
1241     gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1242                                                  encode_state,
1243                                                  width_in_mbs, height_in_mbs,
1244                                                  kernel_shader,
1245                                                  encoder_context);
1246
1247     intel_batchbuffer_start_atomic(batch, 0x1000);
1248     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1249     BEGIN_BATCH(batch, 4);
1250     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1251     OUT_RELOC64(batch,
1252                 vme_context->vme_batchbuffer.bo,
1253                 I915_GEM_DOMAIN_COMMAND, 0,
1254                 0);
1255     OUT_BATCH(batch, 0);
1256     ADVANCE_BATCH(batch);
1257
1258     intel_batchbuffer_end_atomic(batch);
1259 }
1260
1261 static VAStatus gen8_vme_vp8_prepare(VADriverContextP ctx,
1262                                      struct encode_state *encode_state,
1263                                      struct intel_encoder_context *encoder_context)
1264 {
1265     VAStatus vaStatus = VA_STATUS_SUCCESS;
1266     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1267     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1268
1269     /* update vp8 mbmv cost */
1270     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1271
1272     /*Setup all the memory object*/
1273     gen8_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1274     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1275     gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1276
1277     /*Programing media pipeline*/
1278     gen8_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1279
1280     return vaStatus;
1281 }
1282
1283 static VAStatus
1284 gen8_vme_vp8_pipeline(VADriverContextP ctx,
1285                       VAProfile profile,
1286                       struct encode_state *encode_state,
1287                       struct intel_encoder_context *encoder_context)
1288 {
1289     gen8_vme_media_init(ctx, encoder_context);
1290     gen8_vme_vp8_prepare(ctx, encode_state, encoder_context);
1291     gen8_vme_run(ctx, encode_state, encoder_context);
1292     gen8_vme_stop(ctx, encode_state, encoder_context);
1293
1294     return VA_STATUS_SUCCESS;
1295 }
1296
1297 static void
1298 gen8_vme_context_destroy(void *context)
1299 {
1300     struct gen6_vme_context *vme_context = context;
1301
1302     gen8_gpe_context_destroy(&vme_context->gpe_context);
1303
1304     dri_bo_unreference(vme_context->vme_output.bo);
1305     vme_context->vme_output.bo = NULL;
1306
1307     dri_bo_unreference(vme_context->vme_state.bo);
1308     vme_context->vme_state.bo = NULL;
1309
1310     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1311     vme_context->vme_batchbuffer.bo = NULL;
1312
1313     free(vme_context->vme_state_message);
1314     vme_context->vme_state_message = NULL;
1315
1316     dri_bo_unreference(vme_context->i_qp_cost_table);
1317     vme_context->i_qp_cost_table = NULL;
1318
1319     dri_bo_unreference(vme_context->p_qp_cost_table);
1320     vme_context->p_qp_cost_table = NULL;
1321
1322     dri_bo_unreference(vme_context->b_qp_cost_table);
1323     vme_context->b_qp_cost_table = NULL;
1324
1325     free(vme_context->qp_per_mb);
1326     vme_context->qp_per_mb = NULL;
1327
1328     free(vme_context);
1329 }
1330
1331 extern Bool i965_encoder_vp8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
1332
1333 Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1334 {
1335     struct i965_driver_data *i965 = i965_driver_data(ctx);
1336     struct gen6_vme_context *vme_context = NULL;
1337     struct i965_kernel *vme_kernel_list = NULL;
1338     int i965_kernel_num;
1339
1340     if (IS_CHERRYVIEW(i965->intel.device_info) && encoder_context->codec == CODEC_VP8)
1341         return i965_encoder_vp8_vme_context_init(ctx, encoder_context);
1342
1343     switch (encoder_context->codec) {
1344     case CODEC_H264:
1345     case CODEC_H264_MVC:
1346         vme_kernel_list = gen8_vme_kernels;
1347         encoder_context->vme_pipeline = gen8_vme_pipeline;
1348         i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel);
1349         break;
1350
1351     case CODEC_MPEG2:
1352         vme_kernel_list = gen8_vme_mpeg2_kernels;
1353         encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline;
1354         i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
1355         break;
1356
1357     case CODEC_JPEG:
1358         //JPEG encode doesnt have vme. So, set the corresponding fields to NULL.
1359         encoder_context->vme_context = NULL;
1360         encoder_context->vme_pipeline = NULL;
1361         encoder_context->vme_context_destroy = NULL;
1362         break;
1363
1364     case CODEC_VP8:
1365         vme_kernel_list = gen8_vme_vp8_kernels;
1366         encoder_context->vme_pipeline = gen8_vme_vp8_pipeline;
1367         i965_kernel_num = sizeof(gen8_vme_vp8_kernels) / sizeof(struct i965_kernel);
1368         break;
1369
1370     default:
1371         /* never get here */
1372         assert(0);
1373
1374         break;
1375     }
1376
1377     //If the codec is JPEG, bypass VME
1378     if (encoder_context->codec != CODEC_JPEG) {
1379         vme_context = calloc(1, sizeof(struct gen6_vme_context));
1380         assert(vme_context);
1381         vme_context->vme_kernel_sum = i965_kernel_num;
1382         vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1383
1384         vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
1385         vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1386
1387         vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1388         vme_context->gpe_context.sampler.entry_size = 0;
1389         vme_context->gpe_context.sampler.max_entries = 0;
1390
1391         if (i965->intel.eu_total > 0) {
1392             vme_context->gpe_context.vfe_state.max_num_threads = 6 *
1393                                                                  i965->intel.eu_total;
1394         } else
1395             vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1396
1397         vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1398         vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1399         vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1400         vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1401
1402         gen7_vme_scoreboard_init(ctx, vme_context);
1403
1404         gen8_gpe_load_kernels(ctx,
1405                               &vme_context->gpe_context,
1406                               vme_kernel_list,
1407                               i965_kernel_num);
1408         vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1409         vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1410         vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1411         vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1412
1413         encoder_context->vme_context = vme_context;
1414         encoder_context->vme_context_destroy = gen8_vme_context_destroy;
1415
1416         vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1417     }
1418
1419     return True;
1420 }