OSDN Git Service

ROI:enable on gen8 and gen9
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_vme.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43
44 #ifdef SURFACE_STATE_PADDED_SIZE
45 #undef SURFACE_STATE_PADDED_SIZE
46 #endif
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define VME_INTRA_SHADER        0
53 #define VME_INTER_SHADER        1
54 #define VME_BINTER_SHADER       2
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 #define VME_MSG_LENGTH          32
61   
62 static const uint32_t gen8_vme_intra_frame[][4] = {
63 #include "shaders/vme/intra_frame_gen8.g8b"
64 };
65
66 static const uint32_t gen8_vme_inter_frame[][4] = {
67 #include "shaders/vme/inter_frame_gen8.g8b"
68 };
69
70 static const uint32_t gen8_vme_inter_bframe[][4] = {
71 #include "shaders/vme/inter_bframe_gen8.g8b"
72 };
73
74 static struct i965_kernel gen8_vme_kernels[] = {
75     {
76         "VME Intra Frame",
77         VME_INTRA_SHADER, /*index*/
78         gen8_vme_intra_frame,                   
79         sizeof(gen8_vme_intra_frame),           
80         NULL
81     },
82     {
83         "VME inter Frame",
84         VME_INTER_SHADER,
85         gen8_vme_inter_frame,
86         sizeof(gen8_vme_inter_frame),
87         NULL
88     },
89     {
90         "VME inter BFrame",
91         VME_BINTER_SHADER,
92         gen8_vme_inter_bframe,
93         sizeof(gen8_vme_inter_bframe),
94         NULL
95     }
96 };
97
98 static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = {
99 #include "shaders/vme/intra_frame_gen8.g8b"
100 };
101
102 static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = {
103 #include "shaders/vme/mpeg2_inter_gen8.g8b"
104 };
105
106 static struct i965_kernel gen8_vme_mpeg2_kernels[] = {
107     {
108         "VME Intra Frame",
109         VME_INTRA_SHADER, /*index*/
110         gen8_vme_mpeg2_intra_frame,                     
111         sizeof(gen8_vme_mpeg2_intra_frame),             
112         NULL
113     },
114     {
115         "VME inter Frame",
116         VME_INTER_SHADER,
117         gen8_vme_mpeg2_inter_frame,
118         sizeof(gen8_vme_mpeg2_inter_frame),
119         NULL
120     },
121 };
122
123 static const uint32_t gen8_vme_vp8_intra_frame[][4] = {
124 #include "shaders/vme/vp8_intra_frame_gen8.g8b"
125 };
126
127 static const uint32_t gen8_vme_vp8_inter_frame[][4] = {
128 #include "shaders/vme/vp8_inter_frame_gen8.g8b"
129 };
130
131 static struct i965_kernel gen8_vme_vp8_kernels[] = {
132     {
133         "VME Intra Frame",
134         VME_INTRA_SHADER, /*index*/
135         gen8_vme_vp8_intra_frame,
136         sizeof(gen8_vme_vp8_intra_frame),
137         NULL
138     },
139     {
140         "VME inter Frame",
141         VME_INTER_SHADER,
142         gen8_vme_vp8_inter_frame,
143         sizeof(gen8_vme_vp8_inter_frame),
144         NULL
145     },
146 };
147
148 /* only used for VME source surface state */
149 static void 
150 gen8_vme_source_surface_state(VADriverContextP ctx,
151                               int index,
152                               struct object_surface *obj_surface,
153                               struct intel_encoder_context *encoder_context)
154 {
155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
156
157     vme_context->vme_surface2_setup(ctx,
158                                     &vme_context->gpe_context,
159                                     obj_surface,
160                                     BINDING_TABLE_OFFSET(index),
161                                     SURFACE_STATE_OFFSET(index));
162 }
163
164 static void
165 gen8_vme_media_source_surface_state(VADriverContextP ctx,
166                                     int index,
167                                     struct object_surface *obj_surface,
168                                     struct intel_encoder_context *encoder_context)
169 {
170     struct gen6_vme_context *vme_context = encoder_context->vme_context;
171
172     vme_context->vme_media_rw_surface_setup(ctx,
173                                             &vme_context->gpe_context,
174                                             obj_surface,
175                                             BINDING_TABLE_OFFSET(index),
176                                             SURFACE_STATE_OFFSET(index));
177 }
178
179 static void
180 gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx,
181                                            int index,
182                                            struct object_surface *obj_surface,
183                                            struct intel_encoder_context *encoder_context)
184 {
185     struct gen6_vme_context *vme_context = encoder_context->vme_context;
186
187     vme_context->vme_media_chroma_surface_setup(ctx,
188                                                 &vme_context->gpe_context,
189                                                 obj_surface,
190                                                 BINDING_TABLE_OFFSET(index),
191                                                 SURFACE_STATE_OFFSET(index));
192 }
193
194 static void
195 gen8_vme_output_buffer_setup(VADriverContextP ctx,
196                              struct encode_state *encode_state,
197                              int index,
198                              struct intel_encoder_context *encoder_context,
199                              int is_intra,
200                              int width_in_mbs,
201                              int height_in_mbs)
202
203 {
204     struct i965_driver_data *i965 = i965_driver_data(ctx);
205     struct gen6_vme_context *vme_context = encoder_context->vme_context;
206
207     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
208     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
209
210     if (is_intra)
211         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
212     else
213         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
214     /*
215      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
216      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
217      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
218      */
219
220     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
221                                               "VME output buffer",
222                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
223                                               0x1000);
224     assert(vme_context->vme_output.bo);
225     vme_context->vme_buffer_suface_setup(ctx,
226                                          &vme_context->gpe_context,
227                                          &vme_context->vme_output,
228                                          BINDING_TABLE_OFFSET(index),
229                                          SURFACE_STATE_OFFSET(index));
230 }
231
232 static void
233 gen8_vme_avc_output_buffer_setup(VADriverContextP ctx,
234                              struct encode_state *encode_state,
235                              int index,
236                              struct intel_encoder_context *encoder_context)
237 {
238     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
239     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
240     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
241     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
242     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
243
244     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
245
246 }
247
248 static void
249 gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
250                                       struct encode_state *encode_state,
251                                       int index,
252                                       struct intel_encoder_context *encoder_context,
253                                       int width_in_mbs,
254                                       int height_in_mbs)
255 {
256     struct i965_driver_data *i965 = i965_driver_data(ctx);
257     struct gen6_vme_context *vme_context = encoder_context->vme_context;
258
259     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
260     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
261     vme_context->vme_batchbuffer.pitch = 16;
262     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
263                                                    "VME batchbuffer",
264                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
265                                                    0x1000);
266     vme_context->vme_buffer_suface_setup(ctx,
267                                          &vme_context->gpe_context,
268                                          &vme_context->vme_batchbuffer,
269                                          BINDING_TABLE_OFFSET(index),
270                                          SURFACE_STATE_OFFSET(index));
271 }
272
273 static void
274 gen8_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
275                                       struct encode_state *encode_state,
276                                       int index,
277                                       struct intel_encoder_context *encoder_context)
278 {
279     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
280     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
281     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
282
283     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
284 }
285
286 static VAStatus
287 gen8_vme_surface_setup(VADriverContextP ctx, 
288                        struct encode_state *encode_state,
289                        int is_intra,
290                        struct intel_encoder_context *encoder_context)
291 {
292     struct object_surface *obj_surface;
293
294     /*Setup surfaces state*/
295     /* current picture for encoding */
296     obj_surface = encode_state->input_yuv_object;
297     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
298     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
299     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
300
301     if (!is_intra) {
302         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
303         int slice_type;
304
305         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
306         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
307
308         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state);
309
310         if (slice_type == SLICE_TYPE_B)
311             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state);
312     }
313
314     /* VME output */
315     gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
316     gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
317     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
318                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
319                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
320
321     return VA_STATUS_SUCCESS;
322 }
323
324 static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, 
325                                          struct encode_state *encode_state,
326                                          struct intel_encoder_context *encoder_context)
327 {
328     struct gen6_vme_context *vme_context = encoder_context->vme_context;
329     struct gen8_interface_descriptor_data *desc;   
330     int i;
331     dri_bo *bo;
332     unsigned char *desc_ptr;
333
334     bo = vme_context->gpe_context.dynamic_state.bo;
335     dri_bo_map(bo, 1);
336     assert(bo->virtual);
337     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
338
339     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
340
341     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
342         struct i965_kernel *kernel;
343         kernel = &vme_context->gpe_context.kernels[i];
344         assert(sizeof(*desc) == 32);
345         /*Setup the descritor table*/
346         memset(desc, 0, sizeof(*desc));
347         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
348         desc->desc3.sampler_count = 0; /* FIXME: */
349         desc->desc3.sampler_state_pointer = 0;
350         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
351         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
352         desc->desc5.constant_urb_entry_read_offset = 0;
353         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
354
355                 
356         desc++;
357     }
358
359     dri_bo_unmap(bo);
360
361     return VA_STATUS_SUCCESS;
362 }
363
364 static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, 
365                                         struct encode_state *encode_state,
366                                         struct intel_encoder_context *encoder_context)
367 {
368     struct gen6_vme_context *vme_context = encoder_context->vme_context;
369     unsigned char *constant_buffer;
370     unsigned int *vme_state_message;
371     int mv_num = 32;
372
373     vme_state_message = (unsigned int *)vme_context->vme_state_message;
374
375     if (encoder_context->codec == CODEC_H264 ||
376         encoder_context->codec == CODEC_H264_MVC) {
377         if (vme_context->h264_level >= 30) {
378             mv_num = 16;
379         
380             if (vme_context->h264_level >= 31)
381                 mv_num = 8;
382         } 
383     } else if (encoder_context->codec == CODEC_MPEG2) {
384         mv_num = 2;
385     }
386
387     vme_state_message[31] = mv_num;
388
389     dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
390     assert(vme_context->gpe_context.dynamic_state.bo->virtual);
391     constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
392                                          vme_context->gpe_context.curbe_offset;
393
394     /* VME MV/Mb cost table is passed by using const buffer */
395     /* Now it uses the fixed search path. So it is constructed directly
396      * in the GPU shader.
397      */
398     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
399         
400     dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
401
402     return VA_STATUS_SUCCESS;
403 }
404
405 #define         MB_SCOREBOARD_A         (1 << 0)
406 #define         MB_SCOREBOARD_B         (1 << 1)
407 #define         MB_SCOREBOARD_C         (1 << 2)
408
409 /* check whether the mb of (x_index, y_index) is out of bound */
410 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
411 {
412     int mb_index;
413     if (x_index < 0 || x_index >= mb_width)
414         return -1;
415     if (y_index < 0 || y_index >= mb_height)
416         return -1;
417         
418     mb_index = y_index * mb_width + x_index;
419     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
420         return -1;
421     return 0;
422 }
423
424 static void
425 gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
426                                      struct encode_state *encode_state,
427                                      int mb_width, int mb_height,
428                                      int kernel,
429                                      int transform_8x8_mode_flag,
430                                      struct intel_encoder_context *encoder_context)
431 {
432     struct gen6_vme_context *vme_context = encoder_context->vme_context;
433     int mb_row;
434     int s;
435     unsigned int *command_ptr;
436
437 #define         USE_SCOREBOARD          (1 << 21)
438  
439     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
440     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
441
442     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
443         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
444         int first_mb = pSliceParameter->macroblock_address;
445         int num_mb = pSliceParameter->num_macroblocks;
446         unsigned int mb_intra_ub, score_dep;
447         int x_outer, y_outer, x_inner, y_inner;
448         int xtemp_outer = 0;
449
450         x_outer = first_mb % mb_width;
451         y_outer = first_mb / mb_width;
452         mb_row = y_outer;
453                                  
454         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
455             x_inner = x_outer;
456             y_inner = y_outer;
457             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
458                 mb_intra_ub = 0;
459                 score_dep = 0;
460                 if (x_inner != 0) {
461                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
462                     score_dep |= MB_SCOREBOARD_A; 
463                 }
464                 if (y_inner != mb_row) {
465                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
466                     score_dep |= MB_SCOREBOARD_B;
467                     if (x_inner != 0)
468                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
469                     if (x_inner != (mb_width -1)) {
470                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
471                         score_dep |= MB_SCOREBOARD_C;
472                     }
473                 }
474                                                         
475                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
476                 *command_ptr++ = kernel;
477                 *command_ptr++ = USE_SCOREBOARD;
478                 /* Indirect data */
479                 *command_ptr++ = 0;
480                 /* the (X, Y) term of scoreboard */
481                 *command_ptr++ = ((y_inner << 16) | x_inner);
482                 *command_ptr++ = score_dep;
483                 /*inline data */
484                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
485                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
486                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
487                 *command_ptr++ = 0;
488
489                 x_inner -= 2;
490                 y_inner += 1;
491             }
492             x_outer += 1;
493         }
494
495         xtemp_outer = mb_width - 2;
496         if (xtemp_outer < 0)
497             xtemp_outer = 0;
498         x_outer = xtemp_outer;
499         y_outer = first_mb / mb_width;
500         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
501             y_inner = y_outer;
502             x_inner = x_outer;
503             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
504                 mb_intra_ub = 0;
505                 score_dep = 0;
506                 if (x_inner != 0) {
507                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
508                     score_dep |= MB_SCOREBOARD_A; 
509                 }
510                 if (y_inner != mb_row) {
511                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
512                     score_dep |= MB_SCOREBOARD_B;
513                     if (x_inner != 0)
514                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
515
516                     if (x_inner != (mb_width -1)) {
517                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
518                         score_dep |= MB_SCOREBOARD_C;
519                     }
520                 }
521
522                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
523                 *command_ptr++ = kernel;
524                 *command_ptr++ = USE_SCOREBOARD;
525                 /* Indirect data */
526                 *command_ptr++ = 0;
527                 /* the (X, Y) term of scoreboard */
528                 *command_ptr++ = ((y_inner << 16) | x_inner);
529                 *command_ptr++ = score_dep;
530                 /*inline data */
531                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
532                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
533
534                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
535                 *command_ptr++ = 0;
536                 x_inner -= 2;
537                 y_inner += 1;
538             }
539             x_outer++;
540             if (x_outer >= mb_width) {
541                 y_outer += 1;
542                 x_outer = xtemp_outer;
543             }           
544         }
545     }
546
547     *command_ptr++ = MI_BATCH_BUFFER_END;
548     *command_ptr++ = 0;
549
550     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
551 }
552
553 static void
554 gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
555                               struct encode_state *encode_state,
556                               int mb_width, int mb_height,
557                               int kernel,
558                               int transform_8x8_mode_flag,
559                               struct intel_encoder_context *encoder_context)
560 {
561     struct gen6_vme_context *vme_context = encoder_context->vme_context;
562     int mb_x = 0, mb_y = 0;
563     int i, s;
564     unsigned int *command_ptr;
565     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
566     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
567     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
568     int qp;
569     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
570     int qp_mb, qp_index;
571
572     if (encoder_context->rate_control_mode == VA_RC_CQP)
573         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
574     else
575         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
576
577     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
578     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
579
580     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
581         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
582         int slice_mb_begin = pSliceParameter->macroblock_address;
583         int slice_mb_number = pSliceParameter->num_macroblocks;
584         unsigned int mb_intra_ub;
585         int slice_mb_x = pSliceParameter->macroblock_address % mb_width; 
586         for (i = 0; i < slice_mb_number;  ) {
587             int mb_count = i + slice_mb_begin;    
588             mb_x = mb_count % mb_width;
589             mb_y = mb_count / mb_width;
590             mb_intra_ub = 0;
591             if (mb_x != 0) {
592                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
593             }
594             if (mb_y != 0) {
595                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
596                 if (mb_x != 0)
597                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
598                 if (mb_x != (mb_width -1))
599                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
600             }
601             if (i < mb_width) {
602                 if (i == 0)
603                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
604                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
605                 if ((i == (mb_width - 1)) && slice_mb_x) {
606                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
607                 }
608             }
609                 
610             if ((i == mb_width) && slice_mb_x) {
611                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
612             }
613             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
614             *command_ptr++ = kernel;
615             *command_ptr++ = 0;
616             *command_ptr++ = 0;
617             *command_ptr++ = 0;
618             *command_ptr++ = 0;
619    
620             /*inline data */
621             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
622             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
623             /* qp occupies one byte */
624             if (vme_context->roi_enabled) {
625                 qp_index = mb_y * mb_width + mb_x;
626                 qp_mb = *(vme_context->qp_per_mb + qp_index);
627             } else
628                 qp_mb = qp;
629             *command_ptr++ = qp_mb;
630
631             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
632             *command_ptr++ = 0;
633             i += 1;
634         } 
635     }
636
637     *command_ptr++ = MI_BATCH_BUFFER_END;
638     *command_ptr++ = 0;
639
640     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
641 }
642
643 static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
644 {
645     struct gen6_vme_context *vme_context = encoder_context->vme_context;
646
647     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
648
649     /* VME output buffer */
650     dri_bo_unreference(vme_context->vme_output.bo);
651     vme_context->vme_output.bo = NULL;
652
653     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
654     vme_context->vme_batchbuffer.bo = NULL;
655
656     /* VME state */
657     dri_bo_unreference(vme_context->vme_state.bo);
658     vme_context->vme_state.bo = NULL;
659 }
660
661 static void gen8_vme_pipeline_programing(VADriverContextP ctx, 
662                                          struct encode_state *encode_state,
663                                          struct intel_encoder_context *encoder_context)
664 {
665     struct gen6_vme_context *vme_context = encoder_context->vme_context;
666     struct intel_batchbuffer *batch = encoder_context->base.batch;
667     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
668     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
669     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
670     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
671     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
672     int kernel_shader;
673     bool allow_hwscore = true;
674     int s;
675     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
676
677     if (is_low_quality)
678         allow_hwscore = false;
679     else {
680         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
681             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
682             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
683                 allow_hwscore = false;
684                 break;
685             }
686         }
687     }
688
689     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
690         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
691         kernel_shader = VME_INTRA_SHADER;
692     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
693                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
694         kernel_shader = VME_INTER_SHADER;
695     } else {
696         kernel_shader = VME_BINTER_SHADER;
697         if (!allow_hwscore)
698             kernel_shader = VME_INTER_SHADER;
699     }
700     if (allow_hwscore)
701         gen8wa_vme_walker_fill_vme_batchbuffer(ctx, 
702                                              encode_state,
703                                              width_in_mbs, height_in_mbs,
704                                              kernel_shader,
705                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
706                                              encoder_context);
707     else
708         gen8_vme_fill_vme_batchbuffer(ctx, 
709                                       encode_state,
710                                       width_in_mbs, height_in_mbs,
711                                       kernel_shader,
712                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
713                                       encoder_context);
714
715     intel_batchbuffer_start_atomic(batch, 0x1000);
716     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
717     BEGIN_BATCH(batch, 3);
718     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
719     OUT_RELOC(batch,
720               vme_context->vme_batchbuffer.bo,
721               I915_GEM_DOMAIN_COMMAND, 0, 
722               0);
723     OUT_BATCH(batch, 0);
724     ADVANCE_BATCH(batch);
725
726     intel_batchbuffer_end_atomic(batch);        
727 }
728
729 static VAStatus gen8_vme_prepare(VADriverContextP ctx, 
730                                  struct encode_state *encode_state,
731                                  struct intel_encoder_context *encoder_context)
732 {
733     VAStatus vaStatus = VA_STATUS_SUCCESS;
734     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
735     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
736     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
737     struct gen6_vme_context *vme_context = encoder_context->vme_context;
738
739     if (!vme_context->h264_level ||
740         (vme_context->h264_level != pSequenceParameter->level_idc)) {
741         vme_context->h264_level = pSequenceParameter->level_idc;        
742     }   
743
744     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
745     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
746     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
747
748     /*Setup all the memory object*/
749     gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
750     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
751     //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
752     gen8_vme_constant_setup(ctx, encode_state, encoder_context);
753
754     /*Programing media pipeline*/
755     gen8_vme_pipeline_programing(ctx, encode_state, encoder_context);
756
757     return vaStatus;
758 }
759
760 static VAStatus gen8_vme_run(VADriverContextP ctx, 
761                              struct encode_state *encode_state,
762                              struct intel_encoder_context *encoder_context)
763 {
764     struct intel_batchbuffer *batch = encoder_context->base.batch;
765
766     intel_batchbuffer_flush(batch);
767
768     return VA_STATUS_SUCCESS;
769 }
770
771 static VAStatus gen8_vme_stop(VADriverContextP ctx, 
772                               struct encode_state *encode_state,
773                               struct intel_encoder_context *encoder_context)
774 {
775     return VA_STATUS_SUCCESS;
776 }
777
778 static VAStatus
779 gen8_vme_pipeline(VADriverContextP ctx,
780                   VAProfile profile,
781                   struct encode_state *encode_state,
782                   struct intel_encoder_context *encoder_context)
783 {
784     gen8_vme_media_init(ctx, encoder_context);
785     gen8_vme_prepare(ctx, encode_state, encoder_context);
786     gen8_vme_run(ctx, encode_state, encoder_context);
787     gen8_vme_stop(ctx, encode_state, encoder_context);
788
789     return VA_STATUS_SUCCESS;
790 }
791
792 static void
793 gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
794                                    struct encode_state *encode_state,
795                                    int index,
796                                    int is_intra,
797                                    struct intel_encoder_context *encoder_context)
798 {
799     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
800     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
801     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
802
803     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
804 }
805
806 static void
807 gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
808                                             struct encode_state *encode_state,
809                                             int index,
810                                             struct intel_encoder_context *encoder_context)
811 {
812     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
813     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
814     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
815
816     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
817 }
818
819 static VAStatus
820 gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, 
821                              struct encode_state *encode_state,
822                              int is_intra,
823                              struct intel_encoder_context *encoder_context)
824 {
825     struct object_surface *obj_surface;
826
827     /*Setup surfaces state*/
828     /* current picture for encoding */
829     obj_surface = encode_state->input_yuv_object;
830     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
831     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
832     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
833
834     if (!is_intra) {
835         /* reference 0 */
836         obj_surface = encode_state->reference_objects[0];
837
838         if (obj_surface->bo != NULL)
839             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
840
841         /* reference 1 */
842         obj_surface = encode_state->reference_objects[1];
843
844         if (obj_surface && obj_surface->bo != NULL) 
845             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
846     }
847
848     /* VME output */
849     gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
850     gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
851
852     return VA_STATUS_SUCCESS;
853 }
854
855 static void
856 gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
857                                            struct encode_state *encode_state,
858                                            int mb_width, int mb_height,
859                                            int kernel,
860                                            struct intel_encoder_context *encoder_context)
861 {
862     struct gen6_vme_context *vme_context = encoder_context->vme_context;
863     unsigned int *command_ptr;
864
865 #define         MPEG2_SCOREBOARD                (1 << 21)
866
867     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
868     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
869
870     {
871         unsigned int mb_intra_ub, score_dep;
872         int x_outer, y_outer, x_inner, y_inner;
873         int xtemp_outer = 0;
874         int first_mb = 0;
875         int num_mb = mb_width * mb_height;
876
877         x_outer = 0;
878         y_outer = 0;
879         
880                                  
881         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
882             x_inner = x_outer;
883             y_inner = y_outer;
884             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
885                 mb_intra_ub = 0;
886                 score_dep = 0;
887                 if (x_inner != 0) {
888                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
889                     score_dep |= MB_SCOREBOARD_A; 
890                 }
891                 if (y_inner != 0) {
892                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
893                     score_dep |= MB_SCOREBOARD_B;
894
895                     if (x_inner != 0)
896                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
897
898                     if (x_inner != (mb_width -1)) {
899                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
900                         score_dep |= MB_SCOREBOARD_C;
901                     }
902                 }
903                                                         
904                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
905                 *command_ptr++ = kernel;
906                 *command_ptr++ = MPEG2_SCOREBOARD;
907                 /* Indirect data */
908                 *command_ptr++ = 0;
909                 /* the (X, Y) term of scoreboard */
910                 *command_ptr++ = ((y_inner << 16) | x_inner);
911                 *command_ptr++ = score_dep;
912                 /*inline data */
913                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
914                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
915                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
916                 *command_ptr++ = 0;
917
918                 x_inner -= 2;
919                 y_inner += 1;
920             }
921             x_outer += 1;
922         }
923
924         xtemp_outer = mb_width - 2;
925         if (xtemp_outer < 0)
926             xtemp_outer = 0;
927         x_outer = xtemp_outer;
928         y_outer = 0;
929         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
930             y_inner = y_outer;
931             x_inner = x_outer;
932             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
933                 mb_intra_ub = 0;
934                 score_dep = 0;
935                 if (x_inner != 0) {
936                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
937                     score_dep |= MB_SCOREBOARD_A; 
938                 }
939                 if (y_inner != 0) {
940                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
941                     score_dep |= MB_SCOREBOARD_B;
942
943                     if (x_inner != 0)
944                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
945
946                     if (x_inner != (mb_width -1)) {
947                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
948                         score_dep |= MB_SCOREBOARD_C;
949                     }
950                 }
951
952                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
953                 *command_ptr++ = kernel;
954                 *command_ptr++ = MPEG2_SCOREBOARD;
955                 /* Indirect data */
956                 *command_ptr++ = 0;
957                 /* the (X, Y) term of scoreboard */
958                 *command_ptr++ = ((y_inner << 16) | x_inner);
959                 *command_ptr++ = score_dep;
960                 /*inline data */
961                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
962                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
963
964                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
965                 *command_ptr++ = 0;
966                 x_inner -= 2;
967                 y_inner += 1;
968             }
969             x_outer++;
970             if (x_outer >= mb_width) {
971                 y_outer += 1;
972                 x_outer = xtemp_outer;
973             }           
974         }
975     }
976
977     *command_ptr++ = MI_BATCH_BUFFER_END;
978     *command_ptr++ = 0;
979
980     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
981     return;
982 }
983
984 static void
985 gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, 
986                                     struct encode_state *encode_state,
987                                     int mb_width, int mb_height,
988                                     int kernel,
989                                     int transform_8x8_mode_flag,
990                                     struct intel_encoder_context *encoder_context)
991 {
992     struct gen6_vme_context *vme_context = encoder_context->vme_context;
993     int mb_x = 0, mb_y = 0;
994     int i, s, j;
995     unsigned int *command_ptr;
996
997
998     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
999     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1000
1001     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1002         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1003
1004         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1005             int slice_mb_begin = slice_param->macroblock_address;
1006             int slice_mb_number = slice_param->num_macroblocks;
1007             unsigned int mb_intra_ub;
1008
1009             for (i = 0; i < slice_mb_number;) {
1010                 int mb_count = i + slice_mb_begin;    
1011
1012                 mb_x = mb_count % mb_width;
1013                 mb_y = mb_count / mb_width;
1014                 mb_intra_ub = 0;
1015
1016                 if (mb_x != 0) {
1017                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1018                 }
1019
1020                 if (mb_y != 0) {
1021                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1022
1023                     if (mb_x != 0)
1024                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1025
1026                     if (mb_x != (mb_width -1))
1027                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1028                 }
1029
1030                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1031                 *command_ptr++ = kernel;
1032                 *command_ptr++ = 0;
1033                 *command_ptr++ = 0;
1034                 *command_ptr++ = 0;
1035                 *command_ptr++ = 0;
1036    
1037                 /*inline data */
1038                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1039                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1040
1041                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1042                 *command_ptr++ = 0;
1043                 i += 1;
1044             }
1045
1046             slice_param++;
1047         }
1048     }
1049
1050     *command_ptr++ = MI_BATCH_BUFFER_END;
1051     *command_ptr++ = 0;
1052
1053     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1054 }
1055
1056 static void
1057 gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
1058                                    struct encode_state *encode_state,
1059                                    int is_intra,
1060                                    struct intel_encoder_context *encoder_context)
1061 {
1062     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1063     struct intel_batchbuffer *batch = encoder_context->base.batch;
1064     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1065     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1066     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1067     bool allow_hwscore = true;
1068     int s;
1069     int kernel_shader;
1070     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1071
1072     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1073         int j;
1074         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1075
1076         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1077             if (slice_param->macroblock_address % width_in_mbs) {
1078                 allow_hwscore = false;
1079                 break;
1080             }
1081         }
1082     }
1083
1084     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1085     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1086         allow_hwscore = false;
1087         kernel_shader = VME_INTRA_SHADER;
1088     } else {
1089         kernel_shader = VME_INTER_SHADER;
1090     }
1091
1092     if (allow_hwscore) 
1093         gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1094                                                    encode_state,
1095                                                    width_in_mbs, height_in_mbs,
1096                                                    kernel_shader,
1097                                                    encoder_context);
1098     else
1099         gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, 
1100                                             encode_state,
1101                                             width_in_mbs, height_in_mbs,
1102                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1103                                             0,
1104                                             encoder_context);
1105
1106     intel_batchbuffer_start_atomic(batch, 0x1000);
1107     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1108     BEGIN_BATCH(batch, 4);
1109     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1110     OUT_RELOC(batch,
1111               vme_context->vme_batchbuffer.bo,
1112               I915_GEM_DOMAIN_COMMAND, 0, 
1113               0);
1114     OUT_BATCH(batch, 0);
1115     OUT_BATCH(batch, 0);
1116     ADVANCE_BATCH(batch);
1117
1118     intel_batchbuffer_end_atomic(batch);        
1119 }
1120
1121 static VAStatus 
1122 gen8_vme_mpeg2_prepare(VADriverContextP ctx, 
1123                        struct encode_state *encode_state,
1124                        struct intel_encoder_context *encoder_context)
1125 {
1126     VAStatus vaStatus = VA_STATUS_SUCCESS;
1127     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1128     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1129     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1130
1131     if ((!vme_context->mpeg2_level) ||
1132         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1133         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1134     }
1135
1136         
1137     /*Setup all the memory object*/
1138     gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1139     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1140     //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1141     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1142     gen8_vme_constant_setup(ctx, encode_state, encoder_context);
1143
1144     /*Programing media pipeline*/
1145     gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1146
1147     return vaStatus;
1148 }
1149
1150 static VAStatus
1151 gen8_vme_mpeg2_pipeline(VADriverContextP ctx,
1152                         VAProfile profile,
1153                         struct encode_state *encode_state,
1154                         struct intel_encoder_context *encoder_context)
1155 {
1156     gen8_vme_media_init(ctx, encoder_context);
1157     gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1158     gen8_vme_run(ctx, encode_state, encoder_context);
1159     gen8_vme_stop(ctx, encode_state, encoder_context);
1160
1161     return VA_STATUS_SUCCESS;
1162 }
1163
1164 static void
1165 gen8_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1166                                    struct encode_state *encode_state,
1167                                    int index,
1168                                    int is_intra,
1169                                    struct intel_encoder_context *encoder_context)
1170 {
1171     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1172     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1173     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1174
1175     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1176 }
1177
1178 static void
1179 gen8_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1180                                             struct encode_state *encode_state,
1181                                             int index,
1182                                             struct intel_encoder_context *encoder_context)
1183 {
1184     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1185     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1186     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1187
1188     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1189 }
1190
1191 static VAStatus
1192 gen8_vme_vp8_surface_setup(VADriverContextP ctx,
1193                              struct encode_state *encode_state,
1194                              int is_intra,
1195                              struct intel_encoder_context *encoder_context)
1196 {
1197     struct object_surface *obj_surface;
1198
1199     /*Setup surfaces state*/
1200     /* current picture for encoding */
1201     obj_surface = encode_state->input_yuv_object;
1202     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1203     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1204     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1205
1206     if (!is_intra) {
1207         /* reference 0 */
1208         obj_surface = encode_state->reference_objects[0];
1209
1210         if (obj_surface->bo != NULL)
1211             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1212
1213         /* reference 1 */
1214         obj_surface = encode_state->reference_objects[1];
1215
1216         if (obj_surface && obj_surface->bo != NULL)
1217             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1218     }
1219
1220     /* VME output */
1221     gen8_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1222     gen8_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1223
1224     return VA_STATUS_SUCCESS;
1225 }
1226
1227 static void
1228 gen8_vme_vp8_pipeline_programing(VADriverContextP ctx,
1229                                    struct encode_state *encode_state,
1230                                    int is_intra,
1231                                    struct intel_encoder_context *encoder_context)
1232 {
1233     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1234     struct intel_batchbuffer *batch = encoder_context->base.batch;
1235     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1236     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1237     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1238     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1239
1240     gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1241                                                  encode_state,
1242                                                  width_in_mbs, height_in_mbs,
1243                                                  kernel_shader,
1244                                                  encoder_context);
1245
1246     intel_batchbuffer_start_atomic(batch, 0x1000);
1247     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1248     BEGIN_BATCH(batch, 4);
1249     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1250     OUT_RELOC(batch,
1251               vme_context->vme_batchbuffer.bo,
1252               I915_GEM_DOMAIN_COMMAND, 0,
1253               0);
1254     OUT_BATCH(batch, 0);
1255     OUT_BATCH(batch, 0);
1256     ADVANCE_BATCH(batch);
1257
1258     intel_batchbuffer_end_atomic(batch);
1259 }
1260
1261 static VAStatus gen8_vme_vp8_prepare(VADriverContextP ctx,
1262                                  struct encode_state *encode_state,
1263                                  struct intel_encoder_context *encoder_context)
1264 {
1265     VAStatus vaStatus = VA_STATUS_SUCCESS;
1266     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1267     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1268
1269     /* update vp8 mbmv cost */
1270     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1271
1272     /*Setup all the memory object*/
1273     gen8_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1274     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1275     gen8_vme_constant_setup(ctx, encode_state, encoder_context);
1276
1277     /*Programing media pipeline*/
1278     gen8_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1279
1280     return vaStatus;
1281 }
1282
1283 static VAStatus
1284 gen8_vme_vp8_pipeline(VADriverContextP ctx,
1285                         VAProfile profile,
1286                         struct encode_state *encode_state,
1287                         struct intel_encoder_context *encoder_context)
1288 {
1289     gen8_vme_media_init(ctx, encoder_context);
1290     gen8_vme_vp8_prepare(ctx, encode_state, encoder_context);
1291     gen8_vme_run(ctx, encode_state, encoder_context);
1292     gen8_vme_stop(ctx, encode_state, encoder_context);
1293
1294     return VA_STATUS_SUCCESS;
1295 }
1296
1297 static void
1298 gen8_vme_context_destroy(void *context)
1299 {
1300     struct gen6_vme_context *vme_context = context;
1301
1302     gen8_gpe_context_destroy(&vme_context->gpe_context);
1303
1304     dri_bo_unreference(vme_context->vme_output.bo);
1305     vme_context->vme_output.bo = NULL;
1306
1307     dri_bo_unreference(vme_context->vme_state.bo);
1308     vme_context->vme_state.bo = NULL;
1309
1310     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1311     vme_context->vme_batchbuffer.bo = NULL;
1312
1313     free(vme_context->vme_state_message);
1314     vme_context->vme_state_message = NULL;
1315
1316     dri_bo_unreference(vme_context->i_qp_cost_table);
1317     vme_context->i_qp_cost_table = NULL;
1318
1319     dri_bo_unreference(vme_context->p_qp_cost_table);
1320     vme_context->p_qp_cost_table = NULL;
1321
1322     dri_bo_unreference(vme_context->b_qp_cost_table);
1323     vme_context->b_qp_cost_table = NULL;
1324
1325     free(vme_context->qp_per_mb);
1326     vme_context->qp_per_mb = NULL;
1327
1328     free(vme_context);
1329 }
1330
1331 Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1332 {
1333     struct gen6_vme_context *vme_context = NULL;
1334     struct i965_kernel *vme_kernel_list = NULL;
1335     int i965_kernel_num;
1336
1337     switch (encoder_context->codec) {
1338     case CODEC_H264:
1339     case CODEC_H264_MVC:
1340         vme_kernel_list = gen8_vme_kernels;
1341         encoder_context->vme_pipeline = gen8_vme_pipeline;
1342         i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel); 
1343         break;
1344
1345     case CODEC_MPEG2:
1346         vme_kernel_list = gen8_vme_mpeg2_kernels;
1347         encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline;
1348         i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
1349         break;
1350
1351    case CODEC_JPEG:
1352         //JPEG encode doesnt have vme. So, set the corresponding fields to NULL.
1353         encoder_context->vme_context = NULL;
1354         encoder_context->vme_pipeline = NULL;
1355         encoder_context->vme_context_destroy = NULL;
1356         break;
1357
1358     case CODEC_VP8:
1359         vme_kernel_list = gen8_vme_vp8_kernels;
1360         encoder_context->vme_pipeline = gen8_vme_vp8_pipeline;
1361         i965_kernel_num = sizeof(gen8_vme_vp8_kernels) / sizeof(struct i965_kernel);
1362         break;
1363
1364     default:
1365         /* never get here */
1366         assert(0);
1367
1368         break;
1369     }
1370
1371     //If the codec is JPEG, bypass VME
1372     if(encoder_context->codec != CODEC_JPEG) {
1373         vme_context = calloc(1, sizeof(struct gen6_vme_context));
1374         assert(vme_context);
1375         vme_context->vme_kernel_sum = i965_kernel_num;
1376         vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1377
1378         vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
1379         vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
1380         vme_context->gpe_context.sampler_size = 0;
1381
1382
1383         vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1384         vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1385         vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1386         vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1387         vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1388
1389         gen7_vme_scoreboard_init(ctx, vme_context);
1390
1391         gen8_gpe_load_kernels(ctx,
1392                           &vme_context->gpe_context,
1393                           vme_kernel_list,
1394                           i965_kernel_num);
1395         vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1396         vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1397         vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1398         vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1399
1400         encoder_context->vme_context = vme_context;
1401         encoder_context->vme_context_destroy = gen8_vme_context_destroy;
1402
1403         vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1404     }
1405
1406     return True;
1407 }