OSDN Git Service

Use obtained eu_counts to configure GPU media pipeline on Gen8/Gen9
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_vme.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43
44 #ifdef SURFACE_STATE_PADDED_SIZE
45 #undef SURFACE_STATE_PADDED_SIZE
46 #endif
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define VME_INTRA_SHADER        0
53 #define VME_INTER_SHADER        1
54 #define VME_BINTER_SHADER       2
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 #define VME_MSG_LENGTH          32
61   
62 static const uint32_t gen8_vme_intra_frame[][4] = {
63 #include "shaders/vme/intra_frame_gen8.g8b"
64 };
65
66 static const uint32_t gen8_vme_inter_frame[][4] = {
67 #include "shaders/vme/inter_frame_gen8.g8b"
68 };
69
70 static const uint32_t gen8_vme_inter_bframe[][4] = {
71 #include "shaders/vme/inter_bframe_gen8.g8b"
72 };
73
74 static struct i965_kernel gen8_vme_kernels[] = {
75     {
76         "VME Intra Frame",
77         VME_INTRA_SHADER, /*index*/
78         gen8_vme_intra_frame,                   
79         sizeof(gen8_vme_intra_frame),           
80         NULL
81     },
82     {
83         "VME inter Frame",
84         VME_INTER_SHADER,
85         gen8_vme_inter_frame,
86         sizeof(gen8_vme_inter_frame),
87         NULL
88     },
89     {
90         "VME inter BFrame",
91         VME_BINTER_SHADER,
92         gen8_vme_inter_bframe,
93         sizeof(gen8_vme_inter_bframe),
94         NULL
95     }
96 };
97
98 static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = {
99 #include "shaders/vme/intra_frame_gen8.g8b"
100 };
101
102 static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = {
103 #include "shaders/vme/mpeg2_inter_gen8.g8b"
104 };
105
106 static struct i965_kernel gen8_vme_mpeg2_kernels[] = {
107     {
108         "VME Intra Frame",
109         VME_INTRA_SHADER, /*index*/
110         gen8_vme_mpeg2_intra_frame,                     
111         sizeof(gen8_vme_mpeg2_intra_frame),             
112         NULL
113     },
114     {
115         "VME inter Frame",
116         VME_INTER_SHADER,
117         gen8_vme_mpeg2_inter_frame,
118         sizeof(gen8_vme_mpeg2_inter_frame),
119         NULL
120     },
121 };
122
123 static const uint32_t gen8_vme_vp8_intra_frame[][4] = {
124 #include "shaders/vme/vp8_intra_frame_gen8.g8b"
125 };
126
127 static const uint32_t gen8_vme_vp8_inter_frame[][4] = {
128 #include "shaders/vme/vp8_inter_frame_gen8.g8b"
129 };
130
131 static struct i965_kernel gen8_vme_vp8_kernels[] = {
132     {
133         "VME Intra Frame",
134         VME_INTRA_SHADER, /*index*/
135         gen8_vme_vp8_intra_frame,
136         sizeof(gen8_vme_vp8_intra_frame),
137         NULL
138     },
139     {
140         "VME inter Frame",
141         VME_INTER_SHADER,
142         gen8_vme_vp8_inter_frame,
143         sizeof(gen8_vme_vp8_inter_frame),
144         NULL
145     },
146 };
147
148 /* only used for VME source surface state */
149 static void 
150 gen8_vme_source_surface_state(VADriverContextP ctx,
151                               int index,
152                               struct object_surface *obj_surface,
153                               struct intel_encoder_context *encoder_context)
154 {
155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
156
157     vme_context->vme_surface2_setup(ctx,
158                                     &vme_context->gpe_context,
159                                     obj_surface,
160                                     BINDING_TABLE_OFFSET(index),
161                                     SURFACE_STATE_OFFSET(index));
162 }
163
164 static void
165 gen8_vme_media_source_surface_state(VADriverContextP ctx,
166                                     int index,
167                                     struct object_surface *obj_surface,
168                                     struct intel_encoder_context *encoder_context)
169 {
170     struct gen6_vme_context *vme_context = encoder_context->vme_context;
171
172     vme_context->vme_media_rw_surface_setup(ctx,
173                                             &vme_context->gpe_context,
174                                             obj_surface,
175                                             BINDING_TABLE_OFFSET(index),
176                                             SURFACE_STATE_OFFSET(index),
177                                             0);
178 }
179
180 static void
181 gen8_vme_media_chroma_source_surface_state(VADriverContextP ctx,
182                                            int index,
183                                            struct object_surface *obj_surface,
184                                            struct intel_encoder_context *encoder_context)
185 {
186     struct gen6_vme_context *vme_context = encoder_context->vme_context;
187
188     vme_context->vme_media_chroma_surface_setup(ctx,
189                                                 &vme_context->gpe_context,
190                                                 obj_surface,
191                                                 BINDING_TABLE_OFFSET(index),
192                                                 SURFACE_STATE_OFFSET(index),
193                                                 0);
194 }
195
196 static void
197 gen8_vme_output_buffer_setup(VADriverContextP ctx,
198                              struct encode_state *encode_state,
199                              int index,
200                              struct intel_encoder_context *encoder_context,
201                              int is_intra,
202                              int width_in_mbs,
203                              int height_in_mbs)
204
205 {
206     struct i965_driver_data *i965 = i965_driver_data(ctx);
207     struct gen6_vme_context *vme_context = encoder_context->vme_context;
208
209     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
210     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
211
212     if (is_intra)
213         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
214     else
215         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
216     /*
217      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
218      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
219      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
220      */
221
222     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
223                                               "VME output buffer",
224                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
225                                               0x1000);
226     assert(vme_context->vme_output.bo);
227     vme_context->vme_buffer_suface_setup(ctx,
228                                          &vme_context->gpe_context,
229                                          &vme_context->vme_output,
230                                          BINDING_TABLE_OFFSET(index),
231                                          SURFACE_STATE_OFFSET(index));
232 }
233
234 static void
235 gen8_vme_avc_output_buffer_setup(VADriverContextP ctx,
236                              struct encode_state *encode_state,
237                              int index,
238                              struct intel_encoder_context *encoder_context)
239 {
240     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
241     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
242     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
243     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
244     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
245
246     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
247
248 }
249
250 static void
251 gen8_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
252                                       struct encode_state *encode_state,
253                                       int index,
254                                       struct intel_encoder_context *encoder_context,
255                                       int width_in_mbs,
256                                       int height_in_mbs)
257 {
258     struct i965_driver_data *i965 = i965_driver_data(ctx);
259     struct gen6_vme_context *vme_context = encoder_context->vme_context;
260
261     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
262     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
263     vme_context->vme_batchbuffer.pitch = 16;
264     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
265                                                    "VME batchbuffer",
266                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
267                                                    0x1000);
268     vme_context->vme_buffer_suface_setup(ctx,
269                                          &vme_context->gpe_context,
270                                          &vme_context->vme_batchbuffer,
271                                          BINDING_TABLE_OFFSET(index),
272                                          SURFACE_STATE_OFFSET(index));
273 }
274
275 static void
276 gen8_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
277                                       struct encode_state *encode_state,
278                                       int index,
279                                       struct intel_encoder_context *encoder_context)
280 {
281     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
282     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
283     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
284
285     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
286 }
287
288 static VAStatus
289 gen8_vme_surface_setup(VADriverContextP ctx, 
290                        struct encode_state *encode_state,
291                        int is_intra,
292                        struct intel_encoder_context *encoder_context)
293 {
294     struct object_surface *obj_surface;
295
296     /*Setup surfaces state*/
297     /* current picture for encoding */
298     obj_surface = encode_state->input_yuv_object;
299     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
300     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
301     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
302
303     if (!is_intra) {
304         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
305         int slice_type;
306
307         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
308         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
309
310         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen8_vme_source_surface_state);
311
312         if (slice_type == SLICE_TYPE_B)
313             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen8_vme_source_surface_state);
314     }
315
316     /* VME output */
317     gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
318     gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
319     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
320                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
321                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
322
323     return VA_STATUS_SUCCESS;
324 }
325
326 static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, 
327                                          struct encode_state *encode_state,
328                                          struct intel_encoder_context *encoder_context)
329 {
330     struct gen6_vme_context *vme_context = encoder_context->vme_context;
331     struct gen8_interface_descriptor_data *desc;   
332     int i;
333     dri_bo *bo;
334     unsigned char *desc_ptr;
335
336     bo = vme_context->gpe_context.idrt.bo;
337     dri_bo_map(bo, 1);
338     assert(bo->virtual);
339     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;
340
341     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
342
343     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
344         struct i965_kernel *kernel;
345         kernel = &vme_context->gpe_context.kernels[i];
346         assert(sizeof(*desc) == 32);
347         /*Setup the descritor table*/
348         memset(desc, 0, sizeof(*desc));
349         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
350         desc->desc3.sampler_count = 0; /* FIXME: */
351         desc->desc3.sampler_state_pointer = 0;
352         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
353         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
354         desc->desc5.constant_urb_entry_read_offset = 0;
355         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
356
357                 
358         desc++;
359     }
360
361     dri_bo_unmap(bo);
362
363     return VA_STATUS_SUCCESS;
364 }
365
366 static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, 
367                                         struct encode_state *encode_state,
368                                         struct intel_encoder_context *encoder_context,
369                                         int denom)
370 {
371     struct gen6_vme_context *vme_context = encoder_context->vme_context;
372     unsigned char *constant_buffer;
373     unsigned int *vme_state_message;
374     int mv_num = 32;
375
376     vme_state_message = (unsigned int *)vme_context->vme_state_message;
377
378     if (encoder_context->codec == CODEC_H264 ||
379         encoder_context->codec == CODEC_H264_MVC) {
380         if (vme_context->h264_level >= 30) {
381             mv_num = 16 / denom;
382         
383             if (vme_context->h264_level >= 31)
384                 mv_num = 8 / denom;
385         } 
386     } else if (encoder_context->codec == CODEC_MPEG2) {
387         mv_num = 2 / denom;
388     }
389
390     vme_state_message[31] = mv_num;
391
392     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
393     assert(vme_context->gpe_context.curbe.bo->virtual);
394     constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
395                                          vme_context->gpe_context.curbe.offset;
396
397     /* VME MV/Mb cost table is passed by using const buffer */
398     /* Now it uses the fixed search path. So it is constructed directly
399      * in the GPU shader.
400      */
401     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
402         
403     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
404
405     return VA_STATUS_SUCCESS;
406 }
407
408 #define         MB_SCOREBOARD_A         (1 << 0)
409 #define         MB_SCOREBOARD_B         (1 << 1)
410 #define         MB_SCOREBOARD_C         (1 << 2)
411
412 /* check whether the mb of (x_index, y_index) is out of bound */
413 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
414 {
415     int mb_index;
416     if (x_index < 0 || x_index >= mb_width)
417         return -1;
418     if (y_index < 0 || y_index >= mb_height)
419         return -1;
420         
421     mb_index = y_index * mb_width + x_index;
422     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
423         return -1;
424     return 0;
425 }
426
427 static void
428 gen8wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
429                                      struct encode_state *encode_state,
430                                      int mb_width, int mb_height,
431                                      int kernel,
432                                      int transform_8x8_mode_flag,
433                                      struct intel_encoder_context *encoder_context)
434 {
435     struct gen6_vme_context *vme_context = encoder_context->vme_context;
436     int mb_row;
437     int s;
438     unsigned int *command_ptr;
439
440 #define         USE_SCOREBOARD          (1 << 21)
441  
442     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
443     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
444
445     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
446         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
447         int first_mb = pSliceParameter->macroblock_address;
448         int num_mb = pSliceParameter->num_macroblocks;
449         unsigned int mb_intra_ub, score_dep;
450         int x_outer, y_outer, x_inner, y_inner;
451         int xtemp_outer = 0;
452
453         x_outer = first_mb % mb_width;
454         y_outer = first_mb / mb_width;
455         mb_row = y_outer;
456                                  
457         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
458             x_inner = x_outer;
459             y_inner = y_outer;
460             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
461                 mb_intra_ub = 0;
462                 score_dep = 0;
463                 if (x_inner != 0) {
464                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
465                     score_dep |= MB_SCOREBOARD_A; 
466                 }
467                 if (y_inner != mb_row) {
468                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
469                     score_dep |= MB_SCOREBOARD_B;
470                     if (x_inner != 0)
471                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
472                     if (x_inner != (mb_width -1)) {
473                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
474                         score_dep |= MB_SCOREBOARD_C;
475                     }
476                 }
477                                                         
478                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
479                 *command_ptr++ = kernel;
480                 *command_ptr++ = USE_SCOREBOARD;
481                 /* Indirect data */
482                 *command_ptr++ = 0;
483                 /* the (X, Y) term of scoreboard */
484                 *command_ptr++ = ((y_inner << 16) | x_inner);
485                 *command_ptr++ = score_dep;
486                 /*inline data */
487                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
488                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
489                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
490                 *command_ptr++ = 0;
491
492                 x_inner -= 2;
493                 y_inner += 1;
494             }
495             x_outer += 1;
496         }
497
498         xtemp_outer = mb_width - 2;
499         if (xtemp_outer < 0)
500             xtemp_outer = 0;
501         x_outer = xtemp_outer;
502         y_outer = first_mb / mb_width;
503         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
504             y_inner = y_outer;
505             x_inner = x_outer;
506             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
507                 mb_intra_ub = 0;
508                 score_dep = 0;
509                 if (x_inner != 0) {
510                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
511                     score_dep |= MB_SCOREBOARD_A; 
512                 }
513                 if (y_inner != mb_row) {
514                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
515                     score_dep |= MB_SCOREBOARD_B;
516                     if (x_inner != 0)
517                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
518
519                     if (x_inner != (mb_width -1)) {
520                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
521                         score_dep |= MB_SCOREBOARD_C;
522                     }
523                 }
524
525                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
526                 *command_ptr++ = kernel;
527                 *command_ptr++ = USE_SCOREBOARD;
528                 /* Indirect data */
529                 *command_ptr++ = 0;
530                 /* the (X, Y) term of scoreboard */
531                 *command_ptr++ = ((y_inner << 16) | x_inner);
532                 *command_ptr++ = score_dep;
533                 /*inline data */
534                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
535                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
536
537                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
538                 *command_ptr++ = 0;
539                 x_inner -= 2;
540                 y_inner += 1;
541             }
542             x_outer++;
543             if (x_outer >= mb_width) {
544                 y_outer += 1;
545                 x_outer = xtemp_outer;
546             }           
547         }
548     }
549
550     *command_ptr++ = MI_BATCH_BUFFER_END;
551     *command_ptr++ = 0;
552
553     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
554 }
555
556 static void
557 gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
558                               struct encode_state *encode_state,
559                               int mb_width, int mb_height,
560                               int kernel,
561                               int transform_8x8_mode_flag,
562                               struct intel_encoder_context *encoder_context)
563 {
564     struct gen6_vme_context *vme_context = encoder_context->vme_context;
565     int mb_x = 0, mb_y = 0;
566     int i, s;
567     unsigned int *command_ptr;
568     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
569     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
570     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
571     int qp;
572     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
573     int qp_mb, qp_index;
574
575     if (encoder_context->rate_control_mode == VA_RC_CQP)
576         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
577     else
578         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
579
580     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
581     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
582
583     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
584         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
585         int slice_mb_begin = pSliceParameter->macroblock_address;
586         int slice_mb_number = pSliceParameter->num_macroblocks;
587         unsigned int mb_intra_ub;
588         int slice_mb_x = pSliceParameter->macroblock_address % mb_width; 
589         for (i = 0; i < slice_mb_number;  ) {
590             int mb_count = i + slice_mb_begin;    
591             mb_x = mb_count % mb_width;
592             mb_y = mb_count / mb_width;
593             mb_intra_ub = 0;
594             if (mb_x != 0) {
595                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
596             }
597             if (mb_y != 0) {
598                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
599                 if (mb_x != 0)
600                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
601                 if (mb_x != (mb_width -1))
602                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
603             }
604             if (i < mb_width) {
605                 if (i == 0)
606                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
607                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
608                 if ((i == (mb_width - 1)) && slice_mb_x) {
609                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
610                 }
611             }
612                 
613             if ((i == mb_width) && slice_mb_x) {
614                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
615             }
616             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
617             *command_ptr++ = kernel;
618             *command_ptr++ = 0;
619             *command_ptr++ = 0;
620             *command_ptr++ = 0;
621             *command_ptr++ = 0;
622    
623             /*inline data */
624             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
625             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
626             /* qp occupies one byte */
627             if (vme_context->roi_enabled) {
628                 qp_index = mb_y * mb_width + mb_x;
629                 qp_mb = *(vme_context->qp_per_mb + qp_index);
630             } else
631                 qp_mb = qp;
632             *command_ptr++ = qp_mb;
633
634             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
635             *command_ptr++ = 0;
636             i += 1;
637         } 
638     }
639
640     *command_ptr++ = MI_BATCH_BUFFER_END;
641     *command_ptr++ = 0;
642
643     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
644 }
645
646 static void gen8_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
647 {
648     struct gen6_vme_context *vme_context = encoder_context->vme_context;
649
650     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
651
652     /* VME output buffer */
653     dri_bo_unreference(vme_context->vme_output.bo);
654     vme_context->vme_output.bo = NULL;
655
656     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
657     vme_context->vme_batchbuffer.bo = NULL;
658
659     /* VME state */
660     dri_bo_unreference(vme_context->vme_state.bo);
661     vme_context->vme_state.bo = NULL;
662 }
663
664 static void gen8_vme_pipeline_programing(VADriverContextP ctx, 
665                                          struct encode_state *encode_state,
666                                          struct intel_encoder_context *encoder_context)
667 {
668     struct gen6_vme_context *vme_context = encoder_context->vme_context;
669     struct intel_batchbuffer *batch = encoder_context->base.batch;
670     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
671     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
672     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
673     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
674     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
675     int kernel_shader;
676     bool allow_hwscore = true;
677     int s;
678     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
679
680     if (is_low_quality)
681         allow_hwscore = false;
682     else {
683         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
684             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
685             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
686                 allow_hwscore = false;
687                 break;
688             }
689         }
690     }
691
692     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
693         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
694         kernel_shader = VME_INTRA_SHADER;
695     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
696                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
697         kernel_shader = VME_INTER_SHADER;
698     } else {
699         kernel_shader = VME_BINTER_SHADER;
700         if (!allow_hwscore)
701             kernel_shader = VME_INTER_SHADER;
702     }
703     if (allow_hwscore)
704         gen8wa_vme_walker_fill_vme_batchbuffer(ctx, 
705                                              encode_state,
706                                              width_in_mbs, height_in_mbs,
707                                              kernel_shader,
708                                              pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
709                                              encoder_context);
710     else
711         gen8_vme_fill_vme_batchbuffer(ctx, 
712                                       encode_state,
713                                       width_in_mbs, height_in_mbs,
714                                       kernel_shader,
715                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
716                                       encoder_context);
717
718     intel_batchbuffer_start_atomic(batch, 0x1000);
719     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
720     BEGIN_BATCH(batch, 3);
721     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
722     OUT_RELOC(batch,
723               vme_context->vme_batchbuffer.bo,
724               I915_GEM_DOMAIN_COMMAND, 0, 
725               0);
726     OUT_BATCH(batch, 0);
727     ADVANCE_BATCH(batch);
728
729     intel_batchbuffer_end_atomic(batch);        
730 }
731
732 static VAStatus gen8_vme_prepare(VADriverContextP ctx, 
733                                  struct encode_state *encode_state,
734                                  struct intel_encoder_context *encoder_context)
735 {
736     VAStatus vaStatus = VA_STATUS_SUCCESS;
737     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
738     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
739     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
740     struct gen6_vme_context *vme_context = encoder_context->vme_context;
741
742     if (!vme_context->h264_level ||
743         (vme_context->h264_level != pSequenceParameter->level_idc)) {
744         vme_context->h264_level = pSequenceParameter->level_idc;        
745     }   
746
747     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
748     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
749     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
750
751     /*Setup all the memory object*/
752     gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
753     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
754     //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
755     gen8_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1);
756
757     /*Programing media pipeline*/
758     gen8_vme_pipeline_programing(ctx, encode_state, encoder_context);
759
760     return vaStatus;
761 }
762
763 static VAStatus gen8_vme_run(VADriverContextP ctx, 
764                              struct encode_state *encode_state,
765                              struct intel_encoder_context *encoder_context)
766 {
767     struct intel_batchbuffer *batch = encoder_context->base.batch;
768
769     intel_batchbuffer_flush(batch);
770
771     return VA_STATUS_SUCCESS;
772 }
773
774 static VAStatus gen8_vme_stop(VADriverContextP ctx, 
775                               struct encode_state *encode_state,
776                               struct intel_encoder_context *encoder_context)
777 {
778     return VA_STATUS_SUCCESS;
779 }
780
781 static VAStatus
782 gen8_vme_pipeline(VADriverContextP ctx,
783                   VAProfile profile,
784                   struct encode_state *encode_state,
785                   struct intel_encoder_context *encoder_context)
786 {
787     gen8_vme_media_init(ctx, encoder_context);
788     gen8_vme_prepare(ctx, encode_state, encoder_context);
789     gen8_vme_run(ctx, encode_state, encoder_context);
790     gen8_vme_stop(ctx, encode_state, encoder_context);
791
792     return VA_STATUS_SUCCESS;
793 }
794
795 static void
796 gen8_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
797                                    struct encode_state *encode_state,
798                                    int index,
799                                    int is_intra,
800                                    struct intel_encoder_context *encoder_context)
801 {
802     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
803     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
804     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
805
806     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
807 }
808
809 static void
810 gen8_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
811                                             struct encode_state *encode_state,
812                                             int index,
813                                             struct intel_encoder_context *encoder_context)
814 {
815     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
816     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
817     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
818
819     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
820 }
821
822 static VAStatus
823 gen8_vme_mpeg2_surface_setup(VADriverContextP ctx, 
824                              struct encode_state *encode_state,
825                              int is_intra,
826                              struct intel_encoder_context *encoder_context)
827 {
828     struct object_surface *obj_surface;
829
830     /*Setup surfaces state*/
831     /* current picture for encoding */
832     obj_surface = encode_state->input_yuv_object;
833     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
834     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
835     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
836
837     if (!is_intra) {
838         /* reference 0 */
839         obj_surface = encode_state->reference_objects[0];
840
841         if (obj_surface->bo != NULL)
842             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
843
844         /* reference 1 */
845         obj_surface = encode_state->reference_objects[1];
846
847         if (obj_surface && obj_surface->bo != NULL) 
848             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
849     }
850
851     /* VME output */
852     gen8_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
853     gen8_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
854
855     return VA_STATUS_SUCCESS;
856 }
857
858 static void
859 gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
860                                            struct encode_state *encode_state,
861                                            int mb_width, int mb_height,
862                                            int kernel,
863                                            struct intel_encoder_context *encoder_context)
864 {
865     struct gen6_vme_context *vme_context = encoder_context->vme_context;
866     unsigned int *command_ptr;
867
868 #define         MPEG2_SCOREBOARD                (1 << 21)
869
870     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
871     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
872
873     {
874         unsigned int mb_intra_ub, score_dep;
875         int x_outer, y_outer, x_inner, y_inner;
876         int xtemp_outer = 0;
877         int first_mb = 0;
878         int num_mb = mb_width * mb_height;
879
880         x_outer = 0;
881         y_outer = 0;
882         
883                                  
884         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
885             x_inner = x_outer;
886             y_inner = y_outer;
887             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
888                 mb_intra_ub = 0;
889                 score_dep = 0;
890                 if (x_inner != 0) {
891                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
892                     score_dep |= MB_SCOREBOARD_A; 
893                 }
894                 if (y_inner != 0) {
895                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
896                     score_dep |= MB_SCOREBOARD_B;
897
898                     if (x_inner != 0)
899                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
900
901                     if (x_inner != (mb_width -1)) {
902                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
903                         score_dep |= MB_SCOREBOARD_C;
904                     }
905                 }
906                                                         
907                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
908                 *command_ptr++ = kernel;
909                 *command_ptr++ = MPEG2_SCOREBOARD;
910                 /* Indirect data */
911                 *command_ptr++ = 0;
912                 /* the (X, Y) term of scoreboard */
913                 *command_ptr++ = ((y_inner << 16) | x_inner);
914                 *command_ptr++ = score_dep;
915                 /*inline data */
916                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
917                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
918                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
919                 *command_ptr++ = 0;
920
921                 x_inner -= 2;
922                 y_inner += 1;
923             }
924             x_outer += 1;
925         }
926
927         xtemp_outer = mb_width - 2;
928         if (xtemp_outer < 0)
929             xtemp_outer = 0;
930         x_outer = xtemp_outer;
931         y_outer = 0;
932         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
933             y_inner = y_outer;
934             x_inner = x_outer;
935             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
936                 mb_intra_ub = 0;
937                 score_dep = 0;
938                 if (x_inner != 0) {
939                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
940                     score_dep |= MB_SCOREBOARD_A; 
941                 }
942                 if (y_inner != 0) {
943                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
944                     score_dep |= MB_SCOREBOARD_B;
945
946                     if (x_inner != 0)
947                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
948
949                     if (x_inner != (mb_width -1)) {
950                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
951                         score_dep |= MB_SCOREBOARD_C;
952                     }
953                 }
954
955                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
956                 *command_ptr++ = kernel;
957                 *command_ptr++ = MPEG2_SCOREBOARD;
958                 /* Indirect data */
959                 *command_ptr++ = 0;
960                 /* the (X, Y) term of scoreboard */
961                 *command_ptr++ = ((y_inner << 16) | x_inner);
962                 *command_ptr++ = score_dep;
963                 /*inline data */
964                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
965                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
966
967                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
968                 *command_ptr++ = 0;
969                 x_inner -= 2;
970                 y_inner += 1;
971             }
972             x_outer++;
973             if (x_outer >= mb_width) {
974                 y_outer += 1;
975                 x_outer = xtemp_outer;
976             }           
977         }
978     }
979
980     *command_ptr++ = MI_BATCH_BUFFER_END;
981     *command_ptr++ = 0;
982
983     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
984     return;
985 }
986
987 static void
988 gen8_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx, 
989                                     struct encode_state *encode_state,
990                                     int mb_width, int mb_height,
991                                     int kernel,
992                                     int transform_8x8_mode_flag,
993                                     struct intel_encoder_context *encoder_context)
994 {
995     struct gen6_vme_context *vme_context = encoder_context->vme_context;
996     int mb_x = 0, mb_y = 0;
997     int i, s, j;
998     unsigned int *command_ptr;
999
1000
1001     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1002     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1003
1004     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1005         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1006
1007         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1008             int slice_mb_begin = slice_param->macroblock_address;
1009             int slice_mb_number = slice_param->num_macroblocks;
1010             unsigned int mb_intra_ub;
1011
1012             for (i = 0; i < slice_mb_number;) {
1013                 int mb_count = i + slice_mb_begin;    
1014
1015                 mb_x = mb_count % mb_width;
1016                 mb_y = mb_count / mb_width;
1017                 mb_intra_ub = 0;
1018
1019                 if (mb_x != 0) {
1020                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1021                 }
1022
1023                 if (mb_y != 0) {
1024                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1025
1026                     if (mb_x != 0)
1027                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1028
1029                     if (mb_x != (mb_width -1))
1030                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1031                 }
1032
1033                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1034                 *command_ptr++ = kernel;
1035                 *command_ptr++ = 0;
1036                 *command_ptr++ = 0;
1037                 *command_ptr++ = 0;
1038                 *command_ptr++ = 0;
1039    
1040                 /*inline data */
1041                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1042                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1043
1044                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1045                 *command_ptr++ = 0;
1046                 i += 1;
1047             }
1048
1049             slice_param++;
1050         }
1051     }
1052
1053     *command_ptr++ = MI_BATCH_BUFFER_END;
1054     *command_ptr++ = 0;
1055
1056     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1057 }
1058
1059 static void
1060 gen8_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
1061                                    struct encode_state *encode_state,
1062                                    int is_intra,
1063                                    struct intel_encoder_context *encoder_context)
1064 {
1065     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1066     struct intel_batchbuffer *batch = encoder_context->base.batch;
1067     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1068     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1069     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1070     bool allow_hwscore = true;
1071     int s;
1072     int kernel_shader;
1073     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1074
1075     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1076         int j;
1077         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1078
1079         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1080             if (slice_param->macroblock_address % width_in_mbs) {
1081                 allow_hwscore = false;
1082                 break;
1083             }
1084         }
1085     }
1086
1087     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1088     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1089         allow_hwscore = false;
1090         kernel_shader = VME_INTRA_SHADER;
1091     } else {
1092         kernel_shader = VME_INTER_SHADER;
1093     }
1094
1095     if (allow_hwscore) 
1096         gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1097                                                    encode_state,
1098                                                    width_in_mbs, height_in_mbs,
1099                                                    kernel_shader,
1100                                                    encoder_context);
1101     else
1102         gen8_vme_mpeg2_fill_vme_batchbuffer(ctx, 
1103                                             encode_state,
1104                                             width_in_mbs, height_in_mbs,
1105                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1106                                             0,
1107                                             encoder_context);
1108
1109     intel_batchbuffer_start_atomic(batch, 0x1000);
1110     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1111     BEGIN_BATCH(batch, 4);
1112     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1113     OUT_RELOC(batch,
1114               vme_context->vme_batchbuffer.bo,
1115               I915_GEM_DOMAIN_COMMAND, 0, 
1116               0);
1117     OUT_BATCH(batch, 0);
1118     OUT_BATCH(batch, 0);
1119     ADVANCE_BATCH(batch);
1120
1121     intel_batchbuffer_end_atomic(batch);        
1122 }
1123
1124 static VAStatus 
1125 gen8_vme_mpeg2_prepare(VADriverContextP ctx, 
1126                        struct encode_state *encode_state,
1127                        struct intel_encoder_context *encoder_context)
1128 {
1129     VAStatus vaStatus = VA_STATUS_SUCCESS;
1130     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1131     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1132     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1133
1134     if ((!vme_context->mpeg2_level) ||
1135         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1136         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1137     }
1138
1139         
1140     /*Setup all the memory object*/
1141     gen8_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1142     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1143     //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1144     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1145     gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1146
1147     /*Programing media pipeline*/
1148     gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1149
1150     return vaStatus;
1151 }
1152
1153 static VAStatus
1154 gen8_vme_mpeg2_pipeline(VADriverContextP ctx,
1155                         VAProfile profile,
1156                         struct encode_state *encode_state,
1157                         struct intel_encoder_context *encoder_context)
1158 {
1159     gen8_vme_media_init(ctx, encoder_context);
1160     gen8_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1161     gen8_vme_run(ctx, encode_state, encoder_context);
1162     gen8_vme_stop(ctx, encode_state, encoder_context);
1163
1164     return VA_STATUS_SUCCESS;
1165 }
1166
1167 static void
1168 gen8_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1169                                    struct encode_state *encode_state,
1170                                    int index,
1171                                    int is_intra,
1172                                    struct intel_encoder_context *encoder_context)
1173 {
1174     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1175     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1176     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1177
1178     gen8_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1179 }
1180
1181 static void
1182 gen8_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1183                                             struct encode_state *encode_state,
1184                                             int index,
1185                                             struct intel_encoder_context *encoder_context)
1186 {
1187     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1188     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1189     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1190
1191     gen8_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1192 }
1193
1194 static VAStatus
1195 gen8_vme_vp8_surface_setup(VADriverContextP ctx,
1196                              struct encode_state *encode_state,
1197                              int is_intra,
1198                              struct intel_encoder_context *encoder_context)
1199 {
1200     struct object_surface *obj_surface;
1201
1202     /*Setup surfaces state*/
1203     /* current picture for encoding */
1204     obj_surface = encode_state->input_yuv_object;
1205     gen8_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1206     gen8_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1207     gen8_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1208
1209     if (!is_intra) {
1210         /* reference 0 */
1211         obj_surface = encode_state->reference_objects[0];
1212
1213         if (obj_surface->bo != NULL)
1214             gen8_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1215
1216         /* reference 1 */
1217         obj_surface = encode_state->reference_objects[1];
1218
1219         if (obj_surface && obj_surface->bo != NULL)
1220             gen8_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1221     }
1222
1223     /* VME output */
1224     gen8_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1225     gen8_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1226
1227     return VA_STATUS_SUCCESS;
1228 }
1229
1230 static void
1231 gen8_vme_vp8_pipeline_programing(VADriverContextP ctx,
1232                                    struct encode_state *encode_state,
1233                                    int is_intra,
1234                                    struct intel_encoder_context *encoder_context)
1235 {
1236     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1237     struct intel_batchbuffer *batch = encoder_context->base.batch;
1238     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1239     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1240     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1241     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1242
1243     gen8wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1244                                                  encode_state,
1245                                                  width_in_mbs, height_in_mbs,
1246                                                  kernel_shader,
1247                                                  encoder_context);
1248
1249     intel_batchbuffer_start_atomic(batch, 0x1000);
1250     gen8_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1251     BEGIN_BATCH(batch, 4);
1252     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1253     OUT_RELOC(batch,
1254               vme_context->vme_batchbuffer.bo,
1255               I915_GEM_DOMAIN_COMMAND, 0,
1256               0);
1257     OUT_BATCH(batch, 0);
1258     OUT_BATCH(batch, 0);
1259     ADVANCE_BATCH(batch);
1260
1261     intel_batchbuffer_end_atomic(batch);
1262 }
1263
1264 static VAStatus gen8_vme_vp8_prepare(VADriverContextP ctx,
1265                                  struct encode_state *encode_state,
1266                                  struct intel_encoder_context *encoder_context)
1267 {
1268     VAStatus vaStatus = VA_STATUS_SUCCESS;
1269     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1270     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1271
1272     /* update vp8 mbmv cost */
1273     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1274
1275     /*Setup all the memory object*/
1276     gen8_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1277     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
1278     gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1279
1280     /*Programing media pipeline*/
1281     gen8_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1282
1283     return vaStatus;
1284 }
1285
1286 static VAStatus
1287 gen8_vme_vp8_pipeline(VADriverContextP ctx,
1288                         VAProfile profile,
1289                         struct encode_state *encode_state,
1290                         struct intel_encoder_context *encoder_context)
1291 {
1292     gen8_vme_media_init(ctx, encoder_context);
1293     gen8_vme_vp8_prepare(ctx, encode_state, encoder_context);
1294     gen8_vme_run(ctx, encode_state, encoder_context);
1295     gen8_vme_stop(ctx, encode_state, encoder_context);
1296
1297     return VA_STATUS_SUCCESS;
1298 }
1299
1300 static void
1301 gen8_vme_context_destroy(void *context)
1302 {
1303     struct gen6_vme_context *vme_context = context;
1304
1305     gen8_gpe_context_destroy(&vme_context->gpe_context);
1306
1307     dri_bo_unreference(vme_context->vme_output.bo);
1308     vme_context->vme_output.bo = NULL;
1309
1310     dri_bo_unreference(vme_context->vme_state.bo);
1311     vme_context->vme_state.bo = NULL;
1312
1313     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1314     vme_context->vme_batchbuffer.bo = NULL;
1315
1316     free(vme_context->vme_state_message);
1317     vme_context->vme_state_message = NULL;
1318
1319     dri_bo_unreference(vme_context->i_qp_cost_table);
1320     vme_context->i_qp_cost_table = NULL;
1321
1322     dri_bo_unreference(vme_context->p_qp_cost_table);
1323     vme_context->p_qp_cost_table = NULL;
1324
1325     dri_bo_unreference(vme_context->b_qp_cost_table);
1326     vme_context->b_qp_cost_table = NULL;
1327
1328     free(vme_context->qp_per_mb);
1329     vme_context->qp_per_mb = NULL;
1330
1331     free(vme_context);
1332 }
1333
1334 Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1335 {
1336     struct i965_driver_data *i965 = i965_driver_data(ctx);
1337     struct gen6_vme_context *vme_context = NULL;
1338     struct i965_kernel *vme_kernel_list = NULL;
1339     int i965_kernel_num;
1340
1341     switch (encoder_context->codec) {
1342     case CODEC_H264:
1343     case CODEC_H264_MVC:
1344         vme_kernel_list = gen8_vme_kernels;
1345         encoder_context->vme_pipeline = gen8_vme_pipeline;
1346         i965_kernel_num = sizeof(gen8_vme_kernels) / sizeof(struct i965_kernel); 
1347         break;
1348
1349     case CODEC_MPEG2:
1350         vme_kernel_list = gen8_vme_mpeg2_kernels;
1351         encoder_context->vme_pipeline = gen8_vme_mpeg2_pipeline;
1352         i965_kernel_num = sizeof(gen8_vme_mpeg2_kernels) / sizeof(struct i965_kernel); 
1353         break;
1354
1355    case CODEC_JPEG:
1356         //JPEG encode doesnt have vme. So, set the corresponding fields to NULL.
1357         encoder_context->vme_context = NULL;
1358         encoder_context->vme_pipeline = NULL;
1359         encoder_context->vme_context_destroy = NULL;
1360         break;
1361
1362     case CODEC_VP8:
1363         vme_kernel_list = gen8_vme_vp8_kernels;
1364         encoder_context->vme_pipeline = gen8_vme_vp8_pipeline;
1365         i965_kernel_num = sizeof(gen8_vme_vp8_kernels) / sizeof(struct i965_kernel);
1366         break;
1367
1368     default:
1369         /* never get here */
1370         assert(0);
1371
1372         break;
1373     }
1374
1375     //If the codec is JPEG, bypass VME
1376     if(encoder_context->codec != CODEC_JPEG) {
1377         vme_context = calloc(1, sizeof(struct gen6_vme_context));
1378         assert(vme_context);
1379         vme_context->vme_kernel_sum = i965_kernel_num;
1380         vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1381
1382         vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
1383         vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1384
1385         vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1386         vme_context->gpe_context.sampler.entry_size = 0;
1387         vme_context->gpe_context.sampler.max_entries = 0;
1388
1389         if (i965->intel.eu_total > 0) {
1390             vme_context->gpe_context.vfe_state.max_num_threads = 6 *
1391                                i965->intel.eu_total;
1392         } else
1393             vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1394
1395         vme_context->gpe_context.vfe_state.num_urb_entries = 64;
1396         vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1397         vme_context->gpe_context.vfe_state.urb_entry_size = 16;
1398         vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1399
1400         gen7_vme_scoreboard_init(ctx, vme_context);
1401
1402         gen8_gpe_load_kernels(ctx,
1403                           &vme_context->gpe_context,
1404                           vme_kernel_list,
1405                           i965_kernel_num);
1406         vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
1407         vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
1408         vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1409         vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
1410
1411         encoder_context->vme_context = vme_context;
1412         encoder_context->vme_context_destroy = gen8_vme_context_destroy;
1413
1414         vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1415     }
1416
1417     return True;
1418 }