OSDN Git Service

Rewrite the VME shader for MPEG2 encoding on Ivy
[android-x86/hardware-intel-common-vaapi.git] / src / gen7_vme.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #ifdef SURFACE_STATE_PADDED_SIZE
44 #undef SURFACE_STATE_PADDED_SIZE
45 #endif
46
47 #define VME_MSG_LENGTH          32
48 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
49 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
50 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
51
52 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN7
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
55
56 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
57 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
58 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
59
60 enum VIDEO_CODING_TYPE{
61     VIDEO_CODING_AVC = 0,
62     VIDEO_CODING_MPEG2,
63     VIDEO_CODING_SUM
64 };
65
66 enum AVC_VME_KERNEL_TYPE{ 
67     AVC_VME_INTRA_SHADER = 0,
68     AVC_VME_INTER_SHADER,
69     AVC_VME_BATCHBUFFER,
70     AVC_VME_BINTER_SHADER,
71     AVC_VME_KERNEL_SUM
72 };
73
74 enum MPEG2_VME_KERNEL_TYPE{
75     MPEG2_VME_INTER_SHADER = 0,
76     MPEG2_VME_BATCHBUFFER,
77     MPEG2_VME_KERNEL_SUM
78 };
79  
80
81 static const uint32_t gen7_vme_intra_frame[][4] = {
82 #include "shaders/vme/intra_frame_ivb.g7b"
83 };
84
85 static const uint32_t gen7_vme_inter_frame[][4] = {
86 #include "shaders/vme/inter_frame_ivb.g7b"
87 };
88
89 static const uint32_t gen7_vme_batchbuffer[][4] = {
90 #include "shaders/vme/batchbuffer.g7b"
91 };
92
93 static const uint32_t gen7_vme_binter_frame[][4] = {
94 #include "shaders/vme/inter_bframe_ivb.g7b"
95 };
96
97 static struct i965_kernel gen7_vme_kernels[] = {
98     {
99         "AVC VME Intra Frame",
100         AVC_VME_INTRA_SHADER,                   /*index*/
101         gen7_vme_intra_frame,                   
102         sizeof(gen7_vme_intra_frame),           
103         NULL
104     },
105     {
106         "AVC VME inter Frame",
107         AVC_VME_INTER_SHADER,
108         gen7_vme_inter_frame,
109         sizeof(gen7_vme_inter_frame),
110         NULL
111     },
112     {
113         "AVC VME BATCHBUFFER",
114         AVC_VME_BATCHBUFFER,
115         gen7_vme_batchbuffer,
116         sizeof(gen7_vme_batchbuffer),
117         NULL
118     },
119     {
120         "AVC VME binter Frame",
121         AVC_VME_BINTER_SHADER,
122         gen7_vme_binter_frame,
123         sizeof(gen7_vme_binter_frame),
124         NULL
125     }
126 };
127
128 static const uint32_t gen7_vme_mpeg2_inter_frame[][4] = {
129 #include "shaders/vme/mpeg2_inter_ivb.g7b"
130 };
131
132 static const uint32_t gen7_vme_mpeg2_batchbuffer[][4] = {
133 #include "shaders/vme/batchbuffer.g7b"
134 };
135
136 static struct i965_kernel gen7_vme_mpeg2_kernels[] = {
137     {
138         "MPEG2 VME inter Frame",
139         MPEG2_VME_INTER_SHADER,
140         gen7_vme_mpeg2_inter_frame,
141         sizeof(gen7_vme_mpeg2_inter_frame),
142         NULL
143     },
144     {
145         "MPEG2 VME BATCHBUFFER",
146         MPEG2_VME_BATCHBUFFER,
147         gen7_vme_mpeg2_batchbuffer,
148         sizeof(gen7_vme_mpeg2_batchbuffer),
149         NULL
150     },
151 };
152
153 /* only used for VME source surface state */
154 static void 
155 gen7_vme_source_surface_state(VADriverContextP ctx,
156                               int index,
157                               struct object_surface *obj_surface,
158                               struct intel_encoder_context *encoder_context)
159 {
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     vme_context->vme_surface2_setup(ctx,
163                                     &vme_context->gpe_context,
164                                     obj_surface,
165                                     BINDING_TABLE_OFFSET(index),
166                                     SURFACE_STATE_OFFSET(index));
167 }
168
169 static void
170 gen7_vme_media_source_surface_state(VADriverContextP ctx,
171                                     int index,
172                                     struct object_surface *obj_surface,
173                                     struct intel_encoder_context *encoder_context)
174 {
175     struct gen6_vme_context *vme_context = encoder_context->vme_context;
176
177     vme_context->vme_media_rw_surface_setup(ctx,
178                                             &vme_context->gpe_context,
179                                             obj_surface,
180                                             BINDING_TABLE_OFFSET(index),
181                                             SURFACE_STATE_OFFSET(index));
182 }
183
184 static void
185 gen7_vme_output_buffer_setup(VADriverContextP ctx,
186                              struct encode_state *encode_state,
187                              int index,
188                              struct intel_encoder_context *encoder_context)
189
190 {
191     struct i965_driver_data *i965 = i965_driver_data(ctx);
192     struct gen6_vme_context *vme_context = encoder_context->vme_context;
193     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
194     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
195     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
196     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
197     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
198
199     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
200     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
201
202     if (is_intra)
203         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
204     else
205         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
206
207     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, 
208                                               "VME output buffer",
209                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
210                                               0x1000);
211     assert(vme_context->vme_output.bo);
212     vme_context->vme_buffer_suface_setup(ctx,
213                                          &vme_context->gpe_context,
214                                          &vme_context->vme_output,
215                                          BINDING_TABLE_OFFSET(index),
216                                          SURFACE_STATE_OFFSET(index));
217 }
218
219 static void
220 gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
221                                       struct encode_state *encode_state,
222                                       int index,
223                                       struct intel_encoder_context *encoder_context)
224
225 {
226     struct i965_driver_data *i965 = i965_driver_data(ctx);
227     struct gen6_vme_context *vme_context = encoder_context->vme_context;
228     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
229     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
230     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
231
232     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
233     vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
234     vme_context->vme_batchbuffer.pitch = 16;
235     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
236                                                    "VME batchbuffer",
237                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
238                                                    0x1000);
239     vme_context->vme_buffer_suface_setup(ctx,
240                                          &vme_context->gpe_context,
241                                          &vme_context->vme_batchbuffer,
242                                          BINDING_TABLE_OFFSET(index),
243                                          SURFACE_STATE_OFFSET(index));
244 }
245
246 static VAStatus
247 gen7_vme_surface_setup(VADriverContextP ctx, 
248                        struct encode_state *encode_state,
249                        int is_intra,
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct object_surface *obj_surface;
253     struct i965_driver_data *i965 = i965_driver_data(ctx);
254
255     /*Setup surfaces state*/
256     /* current picture for encoding */
257     obj_surface = encode_state->input_yuv_object;
258     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
259     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
260
261     if (!is_intra) {
262         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
263         int slice_type;
264         struct object_surface *slice_obj_surface;
265         int ref_surface_id;
266
267         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
268
269         if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
270                 slice_obj_surface = NULL;
271                 ref_surface_id = slice_param->RefPicList0[0].picture_id;
272                 if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
273                         slice_obj_surface = SURFACE(ref_surface_id);
274                 }
275                 if (slice_obj_surface && slice_obj_surface->bo) {
276                         obj_surface = slice_obj_surface;
277                 } else {
278                         obj_surface = encode_state->reference_objects[0];
279                 }
280                 /* reference 0 */
281                 if (obj_surface && obj_surface->bo)
282                     gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
283         }
284         if (slice_type == SLICE_TYPE_B) {
285                 /* reference 1 */
286                 slice_obj_surface = NULL;
287                 ref_surface_id = slice_param->RefPicList1[0].picture_id;
288                 if (ref_surface_id != 0 && ref_surface_id != VA_INVALID_SURFACE) {
289                         slice_obj_surface = SURFACE(ref_surface_id);
290                 }
291                 if (slice_obj_surface && slice_obj_surface->bo) {
292                         obj_surface = slice_obj_surface;
293                 } else {
294                         obj_surface = encode_state->reference_objects[0];
295                 }
296
297                 obj_surface = encode_state->reference_objects[1];
298                 if (obj_surface && obj_surface->bo)
299                         gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
300         }
301     }
302
303     /* VME output */
304     gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
305     gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
306
307     return VA_STATUS_SUCCESS;
308 }
309
310 static VAStatus gen7_vme_interface_setup(VADriverContextP ctx, 
311                                          struct encode_state *encode_state,
312                                          struct intel_encoder_context *encoder_context)
313 {
314     struct gen6_vme_context *vme_context = encoder_context->vme_context;
315     struct gen6_interface_descriptor_data *desc;   
316     int i;
317     dri_bo *bo;
318
319     bo = vme_context->gpe_context.idrt.bo;
320     dri_bo_map(bo, 1);
321     assert(bo->virtual);
322     desc = bo->virtual;
323
324     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
325         struct i965_kernel *kernel;
326         kernel = &vme_context->gpe_context.kernels[i];
327         assert(sizeof(*desc) == 32);
328         /*Setup the descritor table*/
329         memset(desc, 0, sizeof(*desc));
330         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
331         desc->desc2.sampler_count = 1; /* FIXME: */
332         desc->desc2.sampler_state_pointer = (vme_context->vme_state.bo->offset >> 5);
333         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
334         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
335         desc->desc4.constant_urb_entry_read_offset = 0;
336         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
337                 
338         /*kernel start*/
339         dri_bo_emit_reloc(bo,   
340                           I915_GEM_DOMAIN_INSTRUCTION, 0,
341                           0,
342                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
343                           kernel->bo);
344         /*Sampler State(VME state pointer)*/
345         dri_bo_emit_reloc(bo,
346                           I915_GEM_DOMAIN_INSTRUCTION, 0,
347                           (1 << 2),                                                                     //
348                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc2),
349                           vme_context->vme_state.bo);
350         desc++;
351     }
352     dri_bo_unmap(bo);
353
354     return VA_STATUS_SUCCESS;
355 }
356
357 static VAStatus gen7_vme_constant_setup(VADriverContextP ctx, 
358                                         struct encode_state *encode_state,
359                                         struct intel_encoder_context *encoder_context)
360 {
361     struct gen6_vme_context *vme_context = encoder_context->vme_context;
362     unsigned char *constant_buffer;
363     unsigned int *vme_state_message;
364     int mv_num;
365
366     vme_state_message = (unsigned int *)vme_context->vme_state_message;
367     mv_num = 32;
368
369     if (encoder_context->codec == CODEC_H264) {
370         if (vme_context->h264_level >= 30) {
371             mv_num = 16;
372         
373             if (vme_context->h264_level >= 31)
374                 mv_num = 8;
375         }
376     } else if (encoder_context->codec == CODEC_MPEG2) { 
377         mv_num = 2;
378     }
379
380
381     vme_state_message[31] = mv_num;
382
383     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
384     assert(vme_context->gpe_context.curbe.bo->virtual);
385     constant_buffer = vme_context->gpe_context.curbe.bo->virtual;
386
387     /* Pass the required constant info into the constant buffer */
388     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
389         
390     dri_bo_unmap( vme_context->gpe_context.curbe.bo);
391
392     return VA_STATUS_SUCCESS;
393 }
394
395
396 static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
397                                          struct encode_state *encode_state,
398                                          int is_intra,
399                                          struct intel_encoder_context *encoder_context)
400 {
401     struct gen6_vme_context *vme_context = encoder_context->vme_context;
402     unsigned int *vme_state_message;
403         unsigned int *mb_cost_table;
404     int i;
405     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
406
407         mb_cost_table = (unsigned int *)vme_context->vme_state_message;
408     //building VME state message
409     dri_bo_map(vme_context->vme_state.bo, 1);
410     assert(vme_context->vme_state.bo->virtual);
411     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
412
413     if ((slice_param->slice_type == SLICE_TYPE_P) ||
414         (slice_param->slice_type == SLICE_TYPE_SP)) {
415             vme_state_message[0] = 0x01010101;
416             vme_state_message[1] = 0x10010101;
417             vme_state_message[2] = 0x0F0F0F0F;
418             vme_state_message[3] = 0x100F0F0F;
419             vme_state_message[4] = 0x01010101;
420             vme_state_message[5] = 0x10010101;
421             vme_state_message[6] = 0x0F0F0F0F;
422             vme_state_message[7] = 0x100F0F0F;
423             vme_state_message[8] = 0x01010101;
424             vme_state_message[9] = 0x10010101;
425             vme_state_message[10] = 0x0F0F0F0F;
426             vme_state_message[11] = 0x000F0F0F;
427             vme_state_message[12] = 0x00;
428             vme_state_message[13] = 0x00;
429         } else {
430             vme_state_message[0] = 0x10010101;
431             vme_state_message[1] = 0x100F0F0F;
432             vme_state_message[2] = 0x10010101;
433             vme_state_message[3] = 0x000F0F0F;
434             vme_state_message[4] = 0;
435             vme_state_message[5] = 0;
436             vme_state_message[6] = 0;
437             vme_state_message[7] = 0;
438             vme_state_message[8] = 0;
439             vme_state_message[9] = 0;
440             vme_state_message[10] = 0;
441             vme_state_message[11] = 0;
442             vme_state_message[12] = 0;
443             vme_state_message[13] = 0;
444         }
445
446     vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
447     vme_state_message[15] = 0;
448     vme_state_message[16] = mb_cost_table[0];
449     vme_state_message[17] = mb_cost_table[1];
450     vme_state_message[18] = mb_cost_table[3];
451     vme_state_message[19] = mb_cost_table[4];
452
453     for(i = 20; i < 32; i++) {
454         vme_state_message[i] = 0;
455     }
456
457     dri_bo_unmap( vme_context->vme_state.bo);
458     return VA_STATUS_SUCCESS;
459 }
460
461 static VAStatus gen7_vme_mpeg2_state_setup(VADriverContextP ctx,
462                                          struct encode_state *encode_state,
463                                          int is_intra,
464                                          struct intel_encoder_context *encoder_context)
465 {
466     struct gen6_vme_context *vme_context = encoder_context->vme_context;
467     unsigned int *vme_state_message;
468     int i;
469         
470     //building VME state message
471     dri_bo_map(vme_context->vme_state.bo, 1);
472     assert(vme_context->vme_state.bo->virtual);
473     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
474
475     vme_state_message[0] = 0x01010101;
476     vme_state_message[1] = 0x10010101;
477     vme_state_message[2] = 0x0F0F0F0F;
478     vme_state_message[3] = 0x100F0F0F;
479     vme_state_message[4] = 0x01010101;
480     vme_state_message[5] = 0x10010101;
481     vme_state_message[6] = 0x0F0F0F0F;
482     vme_state_message[7] = 0x100F0F0F;
483     vme_state_message[8] = 0x01010101;
484     vme_state_message[9] = 0x10010101;
485     vme_state_message[10] = 0x0F0F0F0F;
486     vme_state_message[11] = 0x000F0F0F;
487     vme_state_message[12] = 0x00;
488     vme_state_message[13] = 0x00;
489
490     vme_state_message[14] = 0x4a4a;
491     vme_state_message[15] = 0x0;
492     vme_state_message[16] = 0x4a4a4a4a;
493     vme_state_message[17] = 0x4a4a4a4a;
494     vme_state_message[18] = 0x21110100;
495     vme_state_message[19] = 0x61514131;
496
497     for(i = 20; i < 32; i++) {
498         vme_state_message[i] = 0;
499     }
500     //vme_state_message[16] = 0x42424242;                       //cost function LUT set 0 for Intra
501
502     dri_bo_unmap( vme_context->vme_state.bo);
503     return VA_STATUS_SUCCESS;
504 }
505
506 static void
507 gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, 
508                               struct encode_state *encode_state,
509                               int mb_width, int mb_height,
510                               int kernel,
511                               int transform_8x8_mode_flag,
512                               struct intel_encoder_context *encoder_context)
513 {
514     struct gen6_vme_context *vme_context = encoder_context->vme_context;
515     int mb_x = 0, mb_y = 0;
516     int i, s, j;
517     unsigned int *command_ptr;
518
519
520     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
521     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
522
523     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
524         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
525
526         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
527             int slice_mb_begin = slice_param->macroblock_address;
528             int slice_mb_number = slice_param->num_macroblocks;
529             unsigned int mb_intra_ub;
530             int slice_mb_x = slice_param->macroblock_address % mb_width;
531
532             for (i = 0; i < slice_mb_number;) {
533                 int mb_count = i + slice_mb_begin;    
534
535                 mb_x = mb_count % mb_width;
536                 mb_y = mb_count / mb_width;
537                 mb_intra_ub = 0;
538
539                 if (mb_x != 0) {
540                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
541                 }
542
543                 if (mb_y != 0) {
544                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
545
546                     if (mb_x != 0)
547                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
548
549                     if (mb_x != (mb_width -1))
550                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
551                 }
552
553                 if (i < mb_width) {
554                     if (i == 0)
555                         mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
556
557                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
558
559                     if ((i == (mb_width - 1)) && slice_mb_x) {
560                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
561                     }
562                 }
563                 
564                 if ((i == mb_width) && slice_mb_x) {
565                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
566                 }
567
568                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
569                 *command_ptr++ = kernel;
570                 *command_ptr++ = 0;
571                 *command_ptr++ = 0;
572                 *command_ptr++ = 0;
573                 *command_ptr++ = 0;
574    
575                 /*inline data */
576                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
577                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
578
579                 i += 1;
580             }
581
582             slice_param++;
583         }
584     }
585
586     *command_ptr++ = 0;
587     *command_ptr++ = MI_BATCH_BUFFER_END;
588
589     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
590 }
591
592
593 static void gen7_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
594 {
595     struct i965_driver_data *i965 = i965_driver_data(ctx);
596     struct gen6_vme_context *vme_context = encoder_context->vme_context;
597     dri_bo *bo;
598
599     i965_gpe_context_init(ctx, &vme_context->gpe_context);
600
601     /* VME output buffer */
602     dri_bo_unreference(vme_context->vme_output.bo);
603     vme_context->vme_output.bo = NULL;
604
605     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
606     vme_context->vme_batchbuffer.bo = NULL;
607
608     /* VME state */
609     dri_bo_unreference(vme_context->vme_state.bo);
610     bo = dri_bo_alloc(i965->intel.bufmgr,
611                       "Buffer",
612                       1024*16, 64);
613     assert(bo);
614     vme_context->vme_state.bo = bo;
615 }
616
617 static void gen7_vme_pipeline_programing(VADriverContextP ctx, 
618                                          struct encode_state *encode_state,
619                                          struct intel_encoder_context *encoder_context)
620 {
621     struct gen6_vme_context *vme_context = encoder_context->vme_context;
622     struct intel_batchbuffer *batch = encoder_context->base.batch;
623     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
624     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
625     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
626     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
627     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
628     int s;
629     bool allow_hwscore = true;
630     int kernel_shader;
631
632     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
633         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
634         if ((pSliceParameter->macroblock_address % width_in_mbs)) {
635                 allow_hwscore = false;
636                 break;
637         }
638     }
639
640     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
641         (pSliceParameter->slice_type == SLICE_TYPE_I)) {
642         kernel_shader = AVC_VME_INTRA_SHADER;
643     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
644         (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
645         kernel_shader = AVC_VME_INTER_SHADER;
646     } else {
647         kernel_shader = AVC_VME_BINTER_SHADER;
648         if (!allow_hwscore)
649              kernel_shader = AVC_VME_INTER_SHADER;
650     }
651
652     if (allow_hwscore)
653         gen7_vme_walker_fill_vme_batchbuffer(ctx, 
654                                   encode_state,
655                                   width_in_mbs, height_in_mbs,
656                                   kernel_shader,
657                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
658                                   encoder_context);
659         
660     else
661         gen7_vme_fill_vme_batchbuffer(ctx, 
662                                   encode_state,
663                                   width_in_mbs, height_in_mbs,
664                                   kernel_shader, 
665                                   pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
666                                   encoder_context);
667
668     intel_batchbuffer_start_atomic(batch, 0x1000);
669     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
670     BEGIN_BATCH(batch, 2);
671     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
672     OUT_RELOC(batch,
673               vme_context->vme_batchbuffer.bo,
674               I915_GEM_DOMAIN_COMMAND, 0, 
675               0);
676     ADVANCE_BATCH(batch);
677
678     intel_batchbuffer_end_atomic(batch);        
679 }
680
681 static VAStatus gen7_vme_prepare(VADriverContextP ctx, 
682                                  struct encode_state *encode_state,
683                                  struct intel_encoder_context *encoder_context)
684 {
685     VAStatus vaStatus = VA_STATUS_SUCCESS;
686     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
687     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
688     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
689     struct gen6_vme_context *vme_context = encoder_context->vme_context;
690
691     if (!vme_context->h264_level ||
692                 (vme_context->h264_level != pSequenceParameter->level_idc)) {
693         vme_context->h264_level = pSequenceParameter->level_idc;        
694     }
695         
696     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
697     /*Setup all the memory object*/
698     gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
699     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
700     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
701     gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
702
703     /*Programing media pipeline*/
704     gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
705
706     return vaStatus;
707 }
708
709 static VAStatus gen7_vme_run(VADriverContextP ctx, 
710                              struct encode_state *encode_state,
711                              struct intel_encoder_context *encoder_context)
712 {
713     struct intel_batchbuffer *batch = encoder_context->base.batch;
714
715     intel_batchbuffer_flush(batch);
716
717     return VA_STATUS_SUCCESS;
718 }
719
720 static VAStatus gen7_vme_stop(VADriverContextP ctx, 
721                               struct encode_state *encode_state,
722                               struct intel_encoder_context *encoder_context)
723 {
724     return VA_STATUS_SUCCESS;
725 }
726
727 static VAStatus
728 gen7_vme_pipeline(VADriverContextP ctx,
729                   VAProfile profile,
730                   struct encode_state *encode_state,
731                   struct intel_encoder_context *encoder_context)
732 {
733     gen7_vme_media_init(ctx, encoder_context);
734     gen7_vme_prepare(ctx, encode_state, encoder_context);
735     gen7_vme_run(ctx, encode_state, encoder_context);
736     gen7_vme_stop(ctx, encode_state, encoder_context);
737
738     return VA_STATUS_SUCCESS;
739 }
740
741 static void
742 gen7_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
743                                     struct encode_state *encode_state,
744                                     int index,
745                                     int is_intra,
746                                     struct intel_encoder_context *encoder_context)
747
748 {
749     struct i965_driver_data *i965 = i965_driver_data(ctx);
750     struct gen6_vme_context *vme_context = encoder_context->vme_context;
751     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
752     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
753     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
754
755     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
756     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
757
758     if (is_intra)
759         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
760     else
761         vme_context->vme_output.size_block = INTER_VME_OUTPUT_IN_BYTES;
762
763     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
764                                               "VME output buffer",
765                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
766                                               0x1000);
767     assert(vme_context->vme_output.bo);
768     vme_context->vme_buffer_suface_setup(ctx,
769                                          &vme_context->gpe_context,
770                                          &vme_context->vme_output,
771                                          BINDING_TABLE_OFFSET(index),
772                                          SURFACE_STATE_OFFSET(index));
773 }
774
775 static void
776 gen7_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
777                                              struct encode_state *encode_state,
778                                              int index,
779                                              struct intel_encoder_context *encoder_context)
780
781 {
782     struct i965_driver_data *i965 = i965_driver_data(ctx);
783     struct gen6_vme_context *vme_context = encoder_context->vme_context;
784     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
785     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
786     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
787
788     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
789     vme_context->vme_batchbuffer.size_block = 32; /* 4 OWORDs */
790     vme_context->vme_batchbuffer.pitch = 16;
791     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
792                                                    "VME batchbuffer",
793                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
794                                                    0x1000);
795     vme_context->vme_buffer_suface_setup(ctx,
796                                          &vme_context->gpe_context,
797                                          &vme_context->vme_batchbuffer,
798                                          BINDING_TABLE_OFFSET(index),
799                                          SURFACE_STATE_OFFSET(index));
800 }
801
802 static VAStatus
803 gen7_vme_mpeg2_surface_setup(VADriverContextP ctx, 
804                               struct encode_state *encode_state,
805                               int is_intra,
806                               struct intel_encoder_context *encoder_context)
807 {
808     struct object_surface *obj_surface;
809
810     /*Setup surfaces state*/
811     /* current picture for encoding */
812     obj_surface = encode_state->input_yuv_object;
813     gen7_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
814     gen7_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
815
816     if (!is_intra) {
817         /* reference 0 */
818         obj_surface = encode_state->reference_objects[0];
819         if (obj_surface->bo != NULL)
820             gen7_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
821
822         /* reference 1 */
823         obj_surface = encode_state->reference_objects[1];
824         if (obj_surface && obj_surface->bo != NULL) 
825             gen7_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
826     }
827
828     /* VME output */
829     gen7_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
830     gen7_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
831
832     return VA_STATUS_SUCCESS;
833 }
834
835 static void
836 gen7_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
837                                      struct encode_state *encode_state,
838                                      int mb_width, int mb_height,
839                                      int kernel,
840                                      int transform_8x8_mode_flag,
841                                      struct intel_encoder_context *encoder_context)
842 {
843     struct gen6_vme_context *vme_context = encoder_context->vme_context;
844     int number_mb_cmds;
845     int mb_x = 0, mb_y = 0;
846     int i, s, j;
847     unsigned int *command_ptr;
848
849     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
850     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
851
852     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
853         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
854
855         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
856             int slice_mb_begin = slice_param->macroblock_address;
857             int slice_mb_number = slice_param->num_macroblocks;
858             unsigned int mb_intra_ub;
859
860             for (i = 0; i < slice_mb_number;) {
861                 int mb_count = i + slice_mb_begin;    
862
863                 mb_x = mb_count % mb_width;
864                 mb_y = mb_count / mb_width;
865                 mb_intra_ub = 0;
866
867                 if (mb_x != 0) {
868                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
869                 }
870
871                 if (mb_y != 0) {
872                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
873
874                     if (mb_x != 0)
875                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
876
877                     if (mb_x != (mb_width -1))
878                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
879                 }
880
881                 
882
883                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
884                 *command_ptr++ = kernel;
885                 *command_ptr++ = 0;
886                 *command_ptr++ = 0;
887                 *command_ptr++ = 0;
888                 *command_ptr++ = 0;
889    
890                 /*inline data */
891                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
892                 *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
893
894                 i += 1;
895             }
896
897             slice_param++;
898         }
899     }
900
901     *command_ptr++ = 0;
902     *command_ptr++ = MI_BATCH_BUFFER_END;
903
904     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
905 }
906
907 static void
908 gen7_vme_mpeg2_pipeline_programing(VADriverContextP ctx, 
909                                     struct encode_state *encode_state,
910                                     int is_intra,
911                                     struct intel_encoder_context *encoder_context)
912 {
913     struct gen6_vme_context *vme_context = encoder_context->vme_context;
914     struct intel_batchbuffer *batch = encoder_context->base.batch;
915     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
916     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
917     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
918
919     gen7_vme_mpeg2_fill_vme_batchbuffer(ctx, 
920                                          encode_state,
921                                          width_in_mbs, height_in_mbs,
922                                          MPEG2_VME_INTER_SHADER,
923                                          0,
924                                          encoder_context);
925
926     intel_batchbuffer_start_atomic(batch, 0x1000);
927     gen6_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
928     BEGIN_BATCH(batch, 2);
929     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
930     OUT_RELOC(batch,
931               vme_context->vme_batchbuffer.bo,
932               I915_GEM_DOMAIN_COMMAND, 0, 
933               0);
934     ADVANCE_BATCH(batch);
935
936     intel_batchbuffer_end_atomic(batch);
937 }
938
939 static VAStatus
940 gen7_vme_mpeg2_prepare(VADriverContextP ctx, 
941                         struct encode_state *encode_state,
942                         struct intel_encoder_context *encoder_context)
943 {
944     VAStatus vaStatus = VA_STATUS_SUCCESS;
945     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
946     struct gen6_vme_context *vme_context = encoder_context->vme_context;
947
948     if ((!vme_context->mpeg2_level) ||
949                 (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
950         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
951     }
952
953     /*Setup all the memory object*/
954
955     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
956     gen7_vme_mpeg2_surface_setup(ctx, encode_state, 0, encoder_context);
957     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
958     gen7_vme_constant_setup(ctx, encode_state, encoder_context);
959     gen7_vme_mpeg2_state_setup(ctx, encode_state, 0, encoder_context);
960
961     /*Programing media pipeline*/
962     gen7_vme_mpeg2_pipeline_programing(ctx, encode_state, 0, encoder_context);
963
964     return vaStatus;
965 }
966
967 static VAStatus
968 gen7_vme_mpeg2_pipeline(VADriverContextP ctx,
969                          VAProfile profile,
970                          struct encode_state *encode_state,
971                          struct intel_encoder_context *encoder_context)
972 {
973     struct i965_driver_data *i965 = i965_driver_data(ctx);
974     struct gen6_vme_context *vme_context = encoder_context->vme_context;
975     VAEncSliceParameterBufferMPEG2 *slice_param = 
976         (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
977     VAEncSequenceParameterBufferMPEG2 *seq_param = 
978        (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
979  
980     /*No need of to exec VME for Intra slice */
981     if (slice_param->is_intra_slice) {
982          if(!vme_context->vme_output.bo) {
983              int w_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
984              int h_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
985
986              vme_context->vme_output.num_blocks = w_in_mbs * h_in_mbs;
987              vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
988              vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES;
989              vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
990                                                        "MPEG2 VME output buffer",
991                                                        vme_context->vme_output.num_blocks
992                                                            * vme_context->vme_output.size_block,
993                                                        0x1000);
994          }
995
996          return VA_STATUS_SUCCESS;
997     }
998
999     gen7_vme_media_init(ctx, encoder_context);
1000     gen7_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1001     gen7_vme_run(ctx, encode_state, encoder_context);
1002     gen7_vme_stop(ctx, encode_state, encoder_context);
1003
1004     return VA_STATUS_SUCCESS;
1005 }
1006
1007 static void
1008 gen7_vme_context_destroy(void *context)
1009 {
1010     struct gen6_vme_context *vme_context = context;
1011
1012     i965_gpe_context_destroy(&vme_context->gpe_context);
1013
1014     dri_bo_unreference(vme_context->vme_output.bo);
1015     vme_context->vme_output.bo = NULL;
1016
1017     dri_bo_unreference(vme_context->vme_state.bo);
1018     vme_context->vme_state.bo = NULL;
1019
1020     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1021     vme_context->vme_batchbuffer.bo = NULL;
1022
1023     if (vme_context->vme_state_message) {
1024         free(vme_context->vme_state_message);
1025         vme_context->vme_state_message = NULL;
1026     }
1027
1028     free(vme_context);
1029 }
1030
1031 Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1032 {
1033     struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context));
1034     struct i965_kernel *vme_kernel_list = NULL;
1035
1036     vme_context->gpe_context.surface_state_binding_table.length =
1037               (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1038
1039     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
1040     vme_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1041     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
1042
1043     vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1044     vme_context->gpe_context.vfe_state.num_urb_entries = 16;
1045     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
1046     vme_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1047     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
1048
1049     gen7_vme_scoreboard_init(ctx, vme_context);
1050
1051     if (encoder_context->codec == CODEC_H264) {
1052         vme_kernel_list = gen7_vme_kernels;
1053         vme_context->video_coding_type = VIDEO_CODING_AVC;
1054         vme_context->vme_kernel_sum = AVC_VME_KERNEL_SUM; 
1055         encoder_context->vme_pipeline = gen7_vme_pipeline; 
1056     } else if (encoder_context->codec == CODEC_MPEG2) {
1057         vme_kernel_list = gen7_vme_mpeg2_kernels;
1058         vme_context->video_coding_type = VIDEO_CODING_MPEG2;
1059         vme_context->vme_kernel_sum = MPEG2_VME_KERNEL_SUM;
1060         encoder_context->vme_pipeline = gen7_vme_mpeg2_pipeline;
1061     } else {
1062         /* Unsupported codec */
1063         assert(0);
1064     }
1065
1066     i965_gpe_load_kernels(ctx,
1067                           &vme_context->gpe_context,
1068                           vme_kernel_list,
1069                           vme_context->vme_kernel_sum);
1070
1071     vme_context->vme_surface2_setup = gen7_gpe_surface2_setup;
1072     vme_context->vme_media_rw_surface_setup = gen7_gpe_media_rw_surface_setup;
1073     vme_context->vme_buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1074
1075     encoder_context->vme_context = vme_context;
1076     encoder_context->vme_context_destroy = gen7_vme_context_destroy;
1077     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
1078
1079     return True;
1080 }