OSDN Git Service

Keep the old indent style in file of gen7_vme/gen75_vme
[android-x86/hardware-intel-common-vaapi.git] / src / gen7_vme.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #ifdef SURFACE_STATE_PADDED_SIZE
44 #undef SURFACE_STATE_PADDED_SIZE
45 #endif
46
47 #define VME_MSG_LENGTH          32
48 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
49 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
50 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
51
52 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN7
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 enum VIDEO_CODING_TYPE{
61     VIDEO_CODING_AVC = 0,
62     VIDEO_CODING_MPEG2,
63     VIDEO_CODING_SUM
64 };
65
66 enum AVC_VME_KERNEL_TYPE{ 
67     AVC_VME_INTRA_SHADER = 0,
68     AVC_VME_INTER_SHADER,
69     AVC_VME_BATCHBUFFER,
70     AVC_VME_BINTER_SHADER,
71     AVC_VME_KERNEL_SUM
72 };
73
74 enum MPEG2_VME_KERNEL_TYPE{
75     MPEG2_VME_INTER_SHADER = 0,
76     MPEG2_VME_BATCHBUFFER,
77     MPEG2_VME_KERNEL_SUM
78 };
79  
80 #define         MB_SCOREBOARD_A         (1 << 0)
81 #define         MB_SCOREBOARD_B         (1 << 1)
82 #define         MB_SCOREBOARD_C         (1 << 2)
83
84 static const uint32_t gen7_vme_intra_frame[][4] = {
85 #include "shaders/vme/intra_frame_ivb.g7b"
86 };
87
88 static const uint32_t gen7_vme_inter_frame[][4] = {
89 #include "shaders/vme/inter_frame_ivb.g7b"
90 };
91
92 static const uint32_t gen7_vme_batchbuffer[][4] = {
93 #include "shaders/vme/batchbuffer.g7b"
94 };
95
96 static const uint32_t gen7_vme_binter_frame[][4] = {
97 #include "shaders/vme/inter_bframe_ivb.g7b"
98 };
99
100 static struct i965_kernel gen7_vme_kernels[] = {
101     {
102         "AVC VME Intra Frame",
103         AVC_VME_INTRA_SHADER,                   /*index*/
104         gen7_vme_intra_frame,                   
105         sizeof(gen7_vme_intra_frame),           
106         NULL
107     },
108     {
109         "AVC VME inter Frame",
110         AVC_VME_INTER_SHADER,
111         gen7_vme_inter_frame,
112         sizeof(gen7_vme_inter_frame),
113         NULL
114     },
115     {
116         "AVC VME BATCHBUFFER",
117         AVC_VME_BATCHBUFFER,
118         gen7_vme_batchbuffer,
119         sizeof(gen7_vme_batchbuffer),
120         NULL
121     },
122     {
123         "AVC VME binter Frame",
124         AVC_VME_BINTER_SHADER,
125         gen7_vme_binter_frame,
126         sizeof(gen7_vme_binter_frame),
127         NULL
128     }
129 };
130
131 static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = {
132 #include "shaders/vme/mpeg2_inter_frame.g7b"
133 };
134
135 static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = {
136 #include "shaders/vme/batchbuffer.g7b"
137 };
138
139 static struct i965_kernel gen7_vme_mpeg2_kernels[] = {
140     {
141         "MPEG2 VME inter Frame",
142         MPEG2_VME_INTER_SHADER,
143         gen7_vme_mpeg2_inter_frame,
144         sizeof(gen7_vme_mpeg2_inter_frame),
145         NULL
146     },
147     {
148         "MPEG2 VME BATCHBUFFER",
149         MPEG2_VME_BATCHBUFFER,
150         gen7_vme_mpeg2_batchbuffer,
151         sizeof(gen7_vme_mpeg2_batchbuffer),
152         NULL
153     },
154 };
155
156 /* only used for VME source surface state */
157 static void 
158 gen7_vme_source_surface_state(VADriverContextP ctx,
159                               int index,
160                               struct object_surface *obj_surface,
161                               struct intel_encoder_context *encoder_context)
162 {
163     struct gen6_vme_context *vme_context = encoder_context->vme_context;
164
165     vme_context->vme_surface2_setup(ctx,
166                                     &vme_context->gpe_context,
167                                     obj_surface,
168                                     BINDING_TABLE_OFFSET(index),
169                                     SURFACE_STATE_OFFSET(index));
170 }
171
172 static void
173 gen7_vme_media_source_surface_state(VADriverContextP ctx,
174                                     int index,
175                                     struct object_surface *obj_surface,
176                                     struct intel_encoder_context *encoder_context)
177 {
178     struct gen6_vme_context *vme_context = encoder_context->vme_context;
179
180     vme_context->vme_media_rw_surface_setup(ctx,
181                                             &vme_context->gpe_context,
182                                             obj_surface,
183                                             BINDING_TABLE_OFFSET(index),
184                                             SURFACE_STATE_OFFSET(index));
185 }
186
187 static void
188 gen7_vme_output_buffer_setup(VADriverContextP ctx,
189                              struct encode_state *encode_state,
190                              int index,
191                              struct intel_encoder_context *encoder_context)
192
193 {
194     struct i965_driver_data *i965 = i965_driver_data(ctx);
195     struct gen6_vme_context *vme_context = encoder_context->vme_context;
196     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
197     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
198     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
199     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
200     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
201
202     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
203     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
204
205     if (is_intra)
206         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
207     else
208         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
209
210     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
211                                               "VME output buffer",
212                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
213                                               0x1000);
214     assert(vme_context->vme_output.bo);
215     vme_context->vme_buffer_suface_setup(ctx,
216                                          &vme_context->gpe_context,
217                                          &vme_context->vme_output,
218                                          BINDING_TABLE_OFFSET(index),
219                                          SURFACE_STATE_OFFSET(index));
220 }
221
222 static void
223 gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
224                                       struct encode_state *encode_state,
225                                       int index,
226                                       struct intel_encoder_context *encoder_context)
227
228 {
229     struct i965_driver_data *i965 = i965_driver_data(ctx);
230     struct gen6_vme_context *vme_context = encoder_context->vme_context;
231     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
232     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
233     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
234
235     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
236     vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
237     vme_context->vme_batchbuffer.pitch = 16;
238     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
239                                                    "VME batchbuffer",
240                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
241                                                    0x1000);
242     vme_context->vme_buffer_suface_setup(ctx,
243                                          &vme_context->gpe_context,
244                                          &vme_context->vme_batchbuffer,
245                                          BINDING_TABLE_OFFSET(index),
246                                          SURFACE_STATE_OFFSET(index));
247 }
248
249 static VAStatus
250 gen7_vme_surface_setup(VADriverContextP ctx, 
251                        struct encode_state *encode_state,
252                        int is_intra,
253                        struct intel_encoder_context *encoder_context)
254 {
255     struct i965_driver_data *i965 = i965_driver_data(ctx);
256     struct object_surface *obj_surface;
257     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
258
259     /*Setup surfaces state*/
260     /* current picture for encoding */
261     obj_surface = SURFACE(encoder_context->input_yuv_surface);
262     assert(obj_surface);
263     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
264     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
265
266     if (!is_intra) {
267         /* reference 0 */
268         obj_surface = SURFACE(pPicParameter->ReferenceFrames[0].picture_id);
269         assert(obj_surface);
270         if ( obj_surface->bo != NULL)
271             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
272
273         /* reference 1 */
274         obj_surface = SURFACE(pPicParameter->ReferenceFrames[1].picture_id);
275         assert(obj_surface);
276         if ( obj_surface->bo != NULL ) 
277             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
278     }
279
280     /* VME output */
281     gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
282     gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
283
284     return VA_STATUS_SUCCESS;
285 }
286
287 static VAStatus gen7_vme_interface_setup(VADriverContextP ctx, 
288                                          struct encode_state *encode_state,
289                                          struct intel_encoder_context *encoder_context)
290 {
291     struct gen6_vme_context *vme_context = encoder_context->vme_context;
292     struct gen6_interface_descriptor_data *desc;   
293     int i;
294     dri_bo *bo;
295
296     bo = vme_context->gpe_context.idrt.bo;
297     dri_bo_map(bo, 1);
298     assert(bo->virtual);
299     desc = bo->virtual;
300
301     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
302         struct i965_kernel *kernel;
303         kernel = &vme_context->gpe_context.kernels[i];
304         assert(sizeof(*desc) == 32);
305         /*Setup the descritor table*/
306         memset(desc, 0, sizeof(*desc));
307         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
308         desc->desc2.sampler_count = 1; /* FIXME: */
309         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
310         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
311         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
312         desc->desc4.constant_urb_entry_read_offset = 0;
313         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
314                 
315         /*kernel start*/
316         dri_bo_emit_reloc(bo,   
317                           I915_GEM_DOMAIN_INSTRUCTION, 0,
318                           0,
319                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
320                           kernel->bo);
321         /*Sampler State(VME state pointer)*/
322         dri_bo_emit_reloc(bo,
323                           I915_GEM_DOMAIN_INSTRUCTION, 0,
324                           (1 << 2),                                                                     //
325                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
326                           vme_context->vme_state.bo);
327         desc++;
328     }
329     dri_bo_unmap(bo);
330
331     return VA_STATUS_SUCCESS;
332 }
333
334 static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, 
335                                         struct encode_state *encode_state,
336                                         struct intel_encoder_context *encoder_context)
337 {
338     struct gen6_vme_context *vme_context = encoder_context->vme_context;
339     // unsigned char *constant_buffer;
340     unsigned int *vme_state_message;
341     int mv_num = 32;
342     if (vme_context->h264_level >= 30) {
343         mv_num = 16;
344         if (vme_context->h264_level >= 31)
345                 mv_num = 8;
346     } 
347
348     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
349     assert(vme_context->gpe_context.curbe.bo->virtual);
350     // constant_buffer = vme_context->curbe.bo->virtual;
351     vme_state_message = (unsigned int *)vme_context->gpe_context.curbe.bo->virtual;
352     vme_state_message[31] = mv_num;
353         
354     /*TODO copy buffer into CURB*/
355
356     dri_bo_unmap( vme_context->gpe_context.curbe.bo);
357
358     return VA_STATUS_SUCCESS;
359 }
360
361 static const unsigned int intra_mb_mode_cost_table[] = {
362     0x31110001, // for qp0
363     0x09110001, // for qp1
364     0x15030001, // for qp2
365     0x0b030001, // for qp3
366     0x0d030011, // for qp4
367     0x17210011, // for qp5
368     0x41210011, // for qp6
369     0x19210011, // for qp7
370     0x25050003, // for qp8
371     0x1b130003, // for qp9
372     0x1d130003, // for qp10
373     0x27070021, // for qp11
374     0x51310021, // for qp12
375     0x29090021, // for qp13
376     0x35150005, // for qp14
377     0x2b0b0013, // for qp15
378     0x2d0d0013, // for qp16
379     0x37170007, // for qp17
380     0x61410031, // for qp18
381     0x39190009, // for qp19
382     0x45250015, // for qp20
383     0x3b1b000b, // for qp21
384     0x3d1d000d, // for qp22
385     0x47270017, // for qp23
386     0x71510041, // for qp24 ! center for qp=0..30
387     0x49290019, // for qp25
388     0x55350025, // for qp26
389     0x4b2b001b, // for qp27
390     0x4d2d001d, // for qp28
391     0x57370027, // for qp29
392     0x81610051, // for qp30
393     0x57270017, // for qp31
394     0x81510041, // for qp32 ! center for qp=31..51
395     0x59290019, // for qp33
396     0x65350025, // for qp34
397     0x5b2b001b, // for qp35
398     0x5d2d001d, // for qp36
399     0x67370027, // for qp37
400     0x91610051, // for qp38
401     0x69390029, // for qp39
402     0x75450035, // for qp40
403     0x6b3b002b, // for qp41
404     0x6d3d002d, // for qp42
405     0x77470037, // for qp43
406     0xa1710061, // for qp44
407     0x79490039, // for qp45
408     0x85550045, // for qp46
409     0x7b4b003b, // for qp47
410     0x7d4d003d, // for qp48
411     0x87570047, // for qp49
412     0xb1810071, // for qp50
413     0x89590049  // for qp51
414 };
415
416 static void gen7_vme_state_setup_fixup(VADriverContextP ctx,
417                                        struct encode_state *encode_state,
418                                        struct intel_encoder_context *encoder_context,
419                                        unsigned int *vme_state_message)
420 {
421     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
422     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
423     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
424
425     if (slice_param->slice_type != SLICE_TYPE_I &&
426         slice_param->slice_type != SLICE_TYPE_SI)
427         return;
428     if (encoder_context->rate_control_mode == VA_RC_CQP)
429         vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
430     else
431         vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
432 }
433
434 static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
435                                          struct encode_state *encode_state,
436                                          int is_intra,
437                                          struct intel_encoder_context *encoder_context)
438 {
439     struct gen6_vme_context *vme_context = encoder_context->vme_context;
440     unsigned int *vme_state_message;
441         unsigned int *mb_cost_table;
442     int i;
443     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
444
445         mb_cost_table = (unsigned int *)vme_context->vme_state_message;
446     //building VME state message
447     dri_bo_map(vme_context->vme_state.bo, 1);
448     assert(vme_context->vme_state.bo->virtual);
449     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
450
451     if ((slice_param->slice_type == SLICE_TYPE_P) ||
452         (slice_param->slice_type == SLICE_TYPE_SP)) {
453             vme_state_message[0] = 0x01010101;
454             vme_state_message[1] = 0x10010101;
455             vme_state_message[2] = 0x0F0F0F0F;
456             vme_state_message[3] = 0x100F0F0F;
457             vme_state_message[4] = 0x01010101;
458             vme_state_message[5] = 0x10010101;
459             vme_state_message[6] = 0x0F0F0F0F;
460             vme_state_message[7] = 0x100F0F0F;
461             vme_state_message[8] = 0x01010101;
462             vme_state_message[9] = 0x10010101;
463             vme_state_message[10] = 0x0F0F0F0F;
464             vme_state_message[11] = 0x000F0F0F;
465             vme_state_message[12] = 0x00;
466             vme_state_message[13] = 0x00;
467         } else {
468             vme_state_message[0] = 0x10010101;
469             vme_state_message[1] = 0x100F0F0F;
470             vme_state_message[2] = 0x10010101;
471             vme_state_message[3] = 0x000F0F0F;
472             vme_state_message[4] = 0;
473             vme_state_message[5] = 0;
474             vme_state_message[6] = 0;
475             vme_state_message[7] = 0;
476             vme_state_message[8] = 0;
477             vme_state_message[9] = 0;
478             vme_state_message[10] = 0;
479             vme_state_message[11] = 0;
480             vme_state_message[12] = 0;
481             vme_state_message[13] = 0;
482         }
483
484     vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
485     vme_state_message[15] = 0;
486     vme_state_message[16] = mb_cost_table[0];
487     vme_state_message[17] = mb_cost_table[1];
488     vme_state_message[18] = mb_cost_table[3];
489     vme_state_message[19] = mb_cost_table[4];
490
491     for(i = 20; i < 32; i++) {
492         vme_state_message[i] = 0;
493     }
494
495     dri_bo_unmap( vme_context->vme_state.bo);
496     return VA_STATUS_SUCCESS;
497 }
498
499 static VAStatus gen7_vme_vme_state_setup(VADriverContextP ctx,
500                                          struct encode_state *encode_state,
501                                          int is_intra,
502                                          struct intel_encoder_context *encoder_context)
503 {
504     struct gen6_vme_context *vme_context = encoder_context->vme_context;
505     unsigned int *vme_state_message;
506     int i;
507         
508     //building VME state message
509     dri_bo_map(vme_context->vme_state.bo, 1);
510     assert(vme_context->vme_state.bo->virtual);
511     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
512
513     vme_state_message[0] = 0x01010101;
514     vme_state_message[1] = 0x10010101;
515     vme_state_message[2] = 0x0F0F0F0F;
516     vme_state_message[3] = 0x100F0F0F;
517     vme_state_message[4] = 0x01010101;
518     vme_state_message[5] = 0x10010101;
519     vme_state_message[6] = 0x0F0F0F0F;
520     vme_state_message[7] = 0x100F0F0F;
521     vme_state_message[8] = 0x01010101;
522     vme_state_message[9] = 0x10010101;
523     vme_state_message[10] = 0x0F0F0F0F;
524     vme_state_message[11] = 0x000F0F0F;
525     vme_state_message[12] = 0x00;
526     vme_state_message[13] = 0x00;
527
528     vme_state_message[14] = 0x4a4a;
529     vme_state_message[15] = 0x0;
530     vme_state_message[16] = 0x4a4a4a4a;
531     vme_state_message[17] = 0x4a4a4a4a;
532     vme_state_message[18] = 0x21110100;
533     vme_state_message[19] = 0x61514131;
534
535     for(i = 20; i < 32; i++) {
536         vme_state_message[i] = 0;
537     }
538     //vme_state_message[16] = 0x42424242;                       //cost function LUT set 0 for Intra
539
540     gen7_vme_state_setup_fixup(ctx, encode_state, encoder_context, vme_state_message);
541
542     dri_bo_unmap( vme_context->vme_state.bo);
543     return VA_STATUS_SUCCESS;
544 }
545
546 #define         INTRA_PRED_AVAIL_FLAG_AE        0x60
547 #define         INTRA_PRED_AVAIL_FLAG_B         0x10
548 #define         INTRA_PRED_AVAIL_FLAG_C         0x8
549 #define         INTRA_PRED_AVAIL_FLAG_D         0x4
550 #define         INTRA_PRED_AVAIL_FLAG_BCD_MASK  0x1C
551
552 static void
553 gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
554                               struct encode_state *encode_state,
555                               int mb_width, int mb_height,
556                               int kernel,
557                               int transform_8x8_mode_flag,
558                               struct intel_encoder_context *encoder_context)
559 {
560     struct gen6_vme_context *vme_context = encoder_context->vme_context;
561     int mb_x = 0, mb_y = 0;
562     int i, s, j;
563     unsigned int *command_ptr;
564
565
566     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
567     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
568
569     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
570         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
571
572         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
573             int slice_mb_begin = slice_param->macroblock_address;
574             int slice_mb_number = slice_param->num_macroblocks;
575             unsigned int mb_intra_ub;
576             int slice_mb_x = slice_param->macroblock_address % mb_width;
577
578             for (i = 0; i < slice_mb_number;) {
579                 int mb_count = i + slice_mb_begin;    
580
581                 mb_x = mb_count % mb_width;
582                 mb_y = mb_count / mb_width;
583                 mb_intra_ub = 0;
584
585                 if (mb_x != 0) {
586                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
587                 }
588
589                 if (mb_y != 0) {
590                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
591
592                     if (mb_x != 0)
593                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
594
595                     if (mb_x != (mb_width -1))
596                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
597                 }
598
599                 if (i < mb_width) {
600                     if (i == 0)
601                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
602
603                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
604
605                     if ((i == (mb_width - 1)) && slice_mb_x) {
606                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
607                     }
608                 }
609                 
610                 if ((i == mb_width) && slice_mb_x) {
611                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
612                 }
613
614                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
615                 *command_ptr++ = kernel;
616                 *command_ptr++ = 0;
617                 *command_ptr++ = 0;
618                 *command_ptr++ = 0;
619                 *command_ptr++ = 0;
620    
621                 /*inline data */
622                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
623                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
624
625                 i += 1;
626             }
627
628             slice_param++;
629         }
630     }
631
632     *command_ptr++ = 0;
633     *command_ptr++ = MI_BATCH_BUFFER_END;
634
635     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
636 }
637
638 /* check whether the mb of (x_index, y_index) is out of bound */
639 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
640 {
641         int mb_index;
642         if (x_index < 0 || x_index >= mb_width)
643                 return -1;
644         if (y_index < 0 || y_index >= mb_height)
645                 return -1;
646         
647         mb_index = y_index * mb_width + x_index;
648         if (mb_index < first_mb || mb_index > (first_mb + num_mb))
649                 return -1;
650         return 0;
651 }
652
653 static void
654 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
655                               struct encode_state *encode_state,
656                               int mb_width, int mb_height,
657                               int kernel,
658                               int transform_8x8_mode_flag,
659                               struct intel_encoder_context *encoder_context)
660 {
661     struct gen6_vme_context *vme_context = encoder_context->vme_context;
662     int mb_x = 0, mb_y = 0;
663     int mb_row;
664     int s;
665     unsigned int *command_ptr;
666     int temp;
667
668
669 #define         USE_SCOREBOARD          (1 << 21)
670  
671     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
672     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
673
674     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
675         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
676         int first_mb = pSliceParameter->macroblock_address;
677         int num_mb = pSliceParameter->num_macroblocks;
678         unsigned int mb_intra_ub, score_dep;
679         int x_outer, y_outer, x_inner, y_inner;
680
681         x_outer = first_mb % mb_width;
682         y_outer = first_mb / mb_width;
683         mb_row = y_outer;
684                                  
685         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
686                 x_inner = x_outer;
687                 y_inner = y_outer;
688                 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
689                         mb_intra_ub = 0;
690                         score_dep = 0;
691                         if (x_inner != 0) {
692                                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
693                                 score_dep |= MB_SCOREBOARD_A; 
694                         }
695                         if (y_inner != mb_row) {
696                                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
697                                 score_dep |= MB_SCOREBOARD_B;
698                                 if (x_inner != 0)
699                                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
700                                 if (x_inner != (mb_width -1)) {
701                                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
702                                         score_dep |= MB_SCOREBOARD_C;
703                                 }
704                         }
705                                                         
706                         *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
707                         *command_ptr++ = kernel;
708                         *command_ptr++ = USE_SCOREBOARD;
709                         /* Indirect data */
710                         *command_ptr++ = 0;
711                         /* the (X, Y) term of scoreboard */
712                         *command_ptr++ = ((y_inner << 16) | x_inner);
713                         *command_ptr++ = score_dep;
714
715                         /*inline data */
716                         *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
717                         *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
718                         x_inner -= 2;
719                         y_inner += 1;
720                 }
721                 x_outer += 1;
722         }
723
724         x_outer = mb_width - 2;
725         y_outer = first_mb / mb_width;
726         temp = 0;
727         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
728                 y_inner = y_outer;
729                 x_inner = x_outer;
730                 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
731                         mb_intra_ub = 0;
732                         score_dep = 0;
733                         if (x_inner != 0) {
734                                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
735                                 score_dep |= MB_SCOREBOARD_A; 
736                         }
737                         if (y_inner != mb_row) {
738                                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
739                                 score_dep |= MB_SCOREBOARD_B;
740                                 if (x_inner != 0)
741                                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
742                                 if (x_inner != (mb_width -1)) {
743                                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
744                                         score_dep |= MB_SCOREBOARD_C;
745                                 }
746                         }
747
748                         *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
749                         *command_ptr++ = kernel;
750                         *command_ptr++ = USE_SCOREBOARD;
751                         /* Indirect data */
752                         *command_ptr++ = 0;
753                         /* the (X, Y) term of scoreboard */
754                         *command_ptr++ = ((y_inner << 16) | x_inner);
755                         *command_ptr++ = score_dep;
756
757                         /*inline data */
758                         *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
759                         *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
760
761                         x_inner -= 2;
762                         y_inner += 1;
763                 }
764                 temp++;
765                 if (temp == 2) {
766                         y_outer += 1;
767                         temp = 0;
768                         x_outer = mb_width - 2;
769                 } else {
770                         x_outer++;
771                 }       
772         }
773     }
774
775     *command_ptr++ = 0;
776     *command_ptr++ = MI_BATCH_BUFFER_END;
777
778     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
779 }
780
781 static void gen7_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
782 {
783     struct i965_driver_data *i965 = i965_driver_data(ctx);
784     struct gen6_vme_context *vme_context = encoder_context->vme_context;
785     dri_bo *bo;
786
787     i965_gpe_context_init(ctx, &vme_context->gpe_context);
788
789     /* VME output buffer */
790     dri_bo_unreference(vme_context->vme_output.bo);
791     vme_context->vme_output.bo = NULL;
792
793     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
794     vme_context->vme_batchbuffer.bo = NULL;
795
796     /* VME state */
797     dri_bo_unreference(vme_context->vme_state.bo);
798     bo = dri_bo_alloc(i965->intel.bufmgr,
799                       "Buffer",
800                       1024*16, 64);
801     assert(bo);
802     vme_context->vme_state.bo = bo;
803 }
804
805 static void gen7_vme_pipeline_programing(VADriverContextP ctx, 
806                                          struct encode_state *encode_state,
807                                          struct intel_encoder_context *encoder_context)
808 {
809     struct gen6_vme_context *vme_context = encoder_context->vme_context;
810     struct intel_batchbuffer *batch = encoder_context->base.batch;
811     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
812     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
813     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
814     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
815     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
816     int s;
817     bool allow_hwscore = true;
818     int kernel_shader;
819
820     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
821         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
822         if ((pSliceParameter->macroblock_address % width_in_mbs)) {
823                 allow_hwscore = false;
824                 break;
825         }
826     }
827
828     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
829         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
830         kernel_shader = AVC_VME_INTRA_SHADER;
831     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
832         (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
833         kernel_shader = AVC_VME_INTER_SHADER;
834     } else {
835         kernel_shader = AVC_VME_BINTER_SHADER;
836         if (!allow_hwscore)
837              kernel_shader = AVC_VME_INTER_SHADER;
838     }
839
840     if (allow_hwscore)
841         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
842                                   encode_state,
843                                   width_in_mbs, height_in_mbs,
844                                   kernel_shader,
845                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
846                                   encoder_context);
847         
848     else
849         gen7_vme_fill_vme_batchbuffer(ctx, 
850                                   encode_state,
851                                   width_in_mbs, height_in_mbs,
852                                   kernel_shader, 
853                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
854                                   encoder_context);
855
856     intel_batchbuffer_start_atomic(batch, 0x1000);
857     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
858     BEGIN_BATCH(batch, 2);
859     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
860     OUT_RELOC(batch,
861               vme_context->vme_batchbuffer.bo,
862               I915_GEM_DOMAIN_COMMAND, 0, 
863               0);
864     ADVANCE_BATCH(batch);
865
866     intel_batchbuffer_end_atomic(batch);        
867 }
868
869 static VAStatus gen7_vme_prepare(VADriverContextP ctx, 
870                                  struct encode_state *encode_state,
871                                  struct intel_encoder_context *encoder_context)
872 {
873     VAStatus vaStatus = VA_STATUS_SUCCESS;
874     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
875     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
876     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
877     struct gen6_vme_context *vme_context = encoder_context->vme_context;
878
879     if (!vme_context->h264_level ||
880                 (vme_context->h264_level != pSequenceParameter->level_idc)) {
881         vme_context->h264_level = pSequenceParameter->level_idc;        
882     }
883         
884     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
885     /*Setup all the memory object*/
886     gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
887     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
888     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
889     gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
890
891     /*Programing media pipeline*/
892     gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
893
894     return vaStatus;
895 }
896
897 static VAStatus gen7_vme_run(VADriverContextP ctx, 
898                              struct encode_state *encode_state,
899                              struct intel_encoder_context *encoder_context)
900 {
901     struct intel_batchbuffer *batch = encoder_context->base.batch;
902
903     intel_batchbuffer_flush(batch);
904
905     return VA_STATUS_SUCCESS;
906 }
907
908 static VAStatus gen7_vme_stop(VADriverContextP ctx, 
909                               struct encode_state *encode_state,
910                               struct intel_encoder_context *encoder_context)
911 {
912     return VA_STATUS_SUCCESS;
913 }
914
915 static VAStatus
916 gen7_vme_pipeline(VADriverContextP ctx,
917                   VAProfile profile,
918                   struct encode_state *encode_state,
919                   struct intel_encoder_context *encoder_context)
920 {
921     gen7_vme_media_init(ctx, encoder_context);
922     gen7_vme_prepare(ctx, encode_state, encoder_context);
923     gen7_vme_run(ctx, encode_state, encoder_context);
924     gen7_vme_stop(ctx, encode_state, encoder_context);
925
926     return VA_STATUS_SUCCESS;
927 }
928
929 static void
930 gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
931                                     struct encode_state *encode_state,
932                                     int index,
933                                     int is_intra,
934                                     struct intel_encoder_context *encoder_context)
935
936 {
937     struct i965_driver_data *i965 = i965_driver_data(ctx);
938     struct gen6_vme_context *vme_context = encoder_context->vme_context;
939     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
940     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
941     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
942
943     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
944     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
945
946     if (is_intra)
947         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
948     else
949         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
950
951     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
952                                               "VME output buffer",
953                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
954                                               0x1000);
955     assert(vme_context->vme_output.bo);
956     vme_context->vme_buffer_suface_setup(ctx,
957                                          &vme_context->gpe_context,
958                                          &vme_context->vme_output,
959                                          BINDING_TABLE_OFFSET(index),
960                                          SURFACE_STATE_OFFSET(index));
961 }
962
963 static void
964 gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
965                                              struct encode_state *encode_state,
966                                              int index,
967                                              struct intel_encoder_context *encoder_context)
968
969 {
970     struct i965_driver_data *i965 = i965_driver_data(ctx);
971     struct gen6_vme_context *vme_context = encoder_context->vme_context;
972     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
973     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
974     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
975
976     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
977     vme_context->vme_batchbuffer.size_block = 32; /* 4 OWORDs */
978     vme_context->vme_batchbuffer.pitch = 16;
979     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
980                                                    "VME batchbuffer",
981                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
982                                                    0x1000);
983     vme_context->vme_buffer_suface_setup(ctx,
984                                          &vme_context->gpe_context,
985                                          &vme_context->vme_batchbuffer,
986                                          BINDING_TABLE_OFFSET(index),
987                                          SURFACE_STATE_OFFSET(index));
988 }
989
990 static VAStatus
991 gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, 
992                               struct encode_state *encode_state,
993                               int is_intra,
994                               struct intel_encoder_context *encoder_context)
995 {
996     struct i965_driver_data *i965 = i965_driver_data(ctx);
997     struct object_surface *obj_surface;
998     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
999
1000     /*Setup surfaces state*/
1001     /* current picture for encoding */
1002     obj_surface = SURFACE(encoder_context->input_yuv_surface);
1003     assert(obj_surface);
1004     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1005     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1006
1007     if (!is_intra) {
1008         /* reference 0 */
1009         obj_surface = SURFACE(pic_param->forward_reference_picture);
1010         assert(obj_surface);
1011         if ( obj_surface->bo != NULL)
1012             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1013
1014         /* reference 1 */
1015         obj_surface = SURFACE(pic_param->backward_reference_picture);
1016         if (obj_surface && obj_surface->bo != NULL) 
1017             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1018     }
1019
1020     /* VME output */
1021     gen7_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1022     gen7_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1023
1024     return VA_STATUS_SUCCESS;
1025 }
1026
1027 static void
1028 gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1029                                      struct encode_state *encode_state,
1030                                      int mb_width, int mb_height,
1031                                      int kernel,
1032                                      int transform_8x8_mode_flag,
1033                                      struct intel_encoder_context *encoder_context)
1034 {
1035     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1036     int number_mb_cmds;
1037     int mb_x = 0, mb_y = 0;
1038     int i, s, j;
1039     unsigned int *command_ptr;
1040
1041     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1042     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1043
1044     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1045         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1046
1047         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1048             int slice_mb_begin = slice_param->macroblock_address;
1049             int slice_mb_number = slice_param->num_macroblocks;
1050
1051             for (i = 0; i < slice_mb_number;) {
1052                 int mb_count = i + slice_mb_begin;
1053
1054                 mb_x = mb_count % mb_width;
1055                 mb_y = mb_count / mb_width;
1056
1057                 if( i == 0) {
1058                     number_mb_cmds = mb_width;
1059                 } else if ((i + 128) <= slice_mb_number) {
1060                     number_mb_cmds = 128;
1061                 } else {
1062                     number_mb_cmds = slice_mb_number - i;
1063                 }
1064
1065                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1066                 *command_ptr++ = kernel;
1067                 *command_ptr++ = 0;
1068                 *command_ptr++ = 0;
1069                 *command_ptr++ = 0;
1070                 *command_ptr++ = 0;
1071  
1072                 /*inline data */
1073                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1074                 *command_ptr++ = ( (number_mb_cmds << 16) | transform_8x8_mode_flag | ((i == 0) << 1));
1075
1076                 i += number_mb_cmds;
1077             }
1078
1079             slice_param++;
1080         }
1081     }
1082
1083     *command_ptr++ = 0;
1084     *command_ptr++ = MI_BATCH_BUFFER_END;
1085
1086     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1087 }
1088
1089 static void
1090 gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
1091                                     struct encode_state *encode_state,
1092                                     int is_intra,
1093                                     struct intel_encoder_context *encoder_context)
1094 {
1095     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1096     struct intel_batchbuffer *batch = encoder_context->base.batch;
1097     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1098     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1099     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1100
1101     gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, 
1102                                          encode_state,
1103                                          width_in_mbs, height_in_mbs,
1104                                          MPEG2_VME_INTER_SHADER,
1105                                          0,
1106                                          encoder_context);
1107
1108     intel_batchbuffer_start_atomic(batch, 0x1000);
1109     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1110     BEGIN_BATCH(batch, 2);
1111     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
1112     OUT_RELOC(batch,
1113               vme_context->vme_batchbuffer.bo,
1114               I915_GEM_DOMAIN_COMMAND, 0, 
1115               0);
1116     ADVANCE_BATCH(batch);
1117
1118     intel_batchbuffer_end_atomic(batch);
1119 }
1120
1121 static VAStatus
1122 gen7_vme_mpeg2_prepare(VADriverContextP ctx, 
1123                         struct encode_state *encode_state,
1124                         struct intel_encoder_context *encoder_context)
1125 {
1126     VAStatus vaStatus = VA_STATUS_SUCCESS;
1127
1128    /*Setup all the memory object*/
1129     gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context);
1130     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
1131     gen7_vme_vme_state_setup(ctx, encode_state, 0, encoder_context);
1132     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
1133
1134     /*Programing media pipeline*/
1135     gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context);
1136
1137     return vaStatus;
1138 }
1139
1140 static VAStatus
1141 gen7_vme_mpeg2_pipeline(VADriverContextP ctx,
1142                          VAProfile profile,
1143                          struct encode_state *encode_state,
1144                          struct intel_encoder_context *encoder_context)
1145 {
1146     struct i965_driver_data *i965 = i965_driver_data(ctx);
1147     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1148     VAEncSliceParameterBufferMPEG2 *slice_param = 
1149         (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1150     VAEncSequenceParameterBufferMPEG2 *seq_param = 
1151        (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1152  
1153     /*No need of to exec VME for Intra slice */
1154     if (slice_param->is_intra_slice) {
1155          if(!vme_context->vme_output.bo) {
1156              int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1157              int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1158
1159              vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
1160              vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1161              vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
1162              vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1163                                                        "MPEG2 VME output buffer",
1164                                                        vme_context->vme_output.num_blocks
1165                                                            * vme_context->vme_output.size_block,
1166                                                        0x1000);
1167          }
1168
1169          return VA_STATUS_SUCCESS;
1170     }
1171
1172     gen7_vme_media_init(ctx, encoder_context);
1173     gen7_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1174     gen7_vme_run(ctx, encode_state, encoder_context);
1175     gen7_vme_stop(ctx, encode_state, encoder_context);
1176
1177     return VA_STATUS_SUCCESS;
1178 }
1179
1180 static void
1181 gen7_vme_context_destroy(void *context)
1182 {
1183     struct gen6_vme_context *vme_context = context;
1184
1185     i965_gpe_context_destroy(&vme_context->gpe_context);
1186
1187     dri_bo_unreference(vme_context->vme_output.bo);
1188     vme_context->vme_output.bo = NULL;
1189
1190     dri_bo_unreference(vme_context->vme_state.bo);
1191     vme_context->vme_state.bo = NULL;
1192
1193     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1194     vme_context->vme_batchbuffer.bo = NULL;
1195
1196     if (vme_context->vme_state_message) {
1197         free(vme_context->vme_state_message);
1198         vme_context->vme_state_message = NULL;
1199     }
1200
1201     free(vme_context);
1202 }
1203
1204 Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1205 {
1206     struct i965_driver_data *i965 = i965_driver_data(ctx);
1207     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1208
1209     vme_context->gpe_context.surface_state_binding_table.length =
1210               (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1211
1212     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1213     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1214     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1215
1216     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1217     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
1218     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1219     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1220     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1221
1222     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
1223     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
1224     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
1225                                                                 MB_SCOREBOARD_B |
1226                                                                 MB_SCOREBOARD_C);
1227
1228     /* In VME prediction the current mb depends on the neighbour 
1229      * A/B/C macroblock. So the left/up/up-right dependency should
1230      * be considered.
1231      */
1232     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
1233     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
1234     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1235     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1236     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1237     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1238         
1239     vme_context->gpe_context.vfe_desc7.dword = 0;
1240
1241     if(encoder_context->profile == VAProfileH264Baseline ||
1242        encoder_context->profile == VAProfileH264Main     ||
1243        encoder_context->profile == VAProfileH264High ){
1244        vme_context->video_coding_type = VIDEO_CODING_AVC;
1245        vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; 
1246  
1247     } else if (encoder_context->profile == VAProfileMPEG2Simple ||
1248                encoder_context->profile == VAProfileMPEG2Main ){
1249        vme_context->video_coding_type = VIDEO_CODING_MPEG2;
1250        vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM; 
1251     } else {
1252         /* Unsupported encoding profile */
1253         assert(0);
1254     }
1255
1256     if (IS_GEN7(i965->intel.device_id)) {
1257         if (vme_context->video_coding_type == VIDEO_CODING_AVC) {
1258               i965_gpe_load_kernels(ctx,
1259                                     &vme_context->gpe_context,
1260                                     gen7_vme_kernels,
1261                                     vme_context->vme_kernel_sum);
1262               encoder_context->vme_pipeline = gen7_vme_pipeline;
1263  
1264         } else {
1265               i965_gpe_load_kernels(ctx,
1266                                     &vme_context->gpe_context,
1267                                     gen7_vme_mpeg2_kernels,
1268                                     vme_context->vme_kernel_sum);
1269               encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline;
1270  
1271         }
1272
1273         vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1274         vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1275         vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1276     }
1277
1278     encoder_context->vme_context = vme_context;
1279     encoder_context->vme_context_destroy = gen7_vme_context_destroy;
1280     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1281
1282     return True;
1283 }