OSDN Git Service

b8eacabb992fb0f635f5fce893f8167d579cda1d
[android-x86/hardware-intel-common-vaapi.git] / src / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6
7 #include "intel_batchbuffer.h"
8 #include "intel_driver.h"
9
10 #include "i965_defines.h"
11 #include "i965_drv_video.h"
12 #include "i965_media.h"
13 #include "i965_media_h264.h"
14 #include "i965_decoder_utils.h"
15
16 enum {
17     INTRA_16X16 = 0,
18     INTRA_8X8,
19     INTRA_4X4,
20     INTRA_PCM,
21     FRAMEMB_MOTION,
22     FIELDMB_MOTION,
23     MBAFF_MOTION,
24 };
25
26 struct intra_kernel_header
27 {
28     /* R1.0 */
29     unsigned char intra_4x4_luma_mode_0_offset;
30     unsigned char intra_4x4_luma_mode_1_offset;
31     unsigned char intra_4x4_luma_mode_2_offset;
32     unsigned char intra_4x4_luma_mode_3_offset;
33     /* R1.1 */
34     unsigned char intra_4x4_luma_mode_4_offset;
35     unsigned char intra_4x4_luma_mode_5_offset;
36     unsigned char intra_4x4_luma_mode_6_offset;
37     unsigned char intra_4x4_luma_mode_7_offset;
38     /* R1.2 */
39     unsigned char intra_4x4_luma_mode_8_offset;
40     unsigned char pad0;
41     unsigned short top_reference_offset;
42     /* R1.3 */
43     unsigned char intra_8x8_luma_mode_0_offset;
44     unsigned char intra_8x8_luma_mode_1_offset;
45     unsigned char intra_8x8_luma_mode_2_offset;
46     unsigned char intra_8x8_luma_mode_3_offset;
47     /* R1.4 */
48     unsigned char intra_8x8_luma_mode_4_offset;
49     unsigned char intra_8x8_luma_mode_5_offset;
50     unsigned char intra_8x8_luma_mode_6_offset;
51     unsigned char intra_8x8_luma_mode_7_offset;
52     /* R1.5 */
53     unsigned char intra_8x8_luma_mode_8_offset;
54     unsigned char pad1;
55     unsigned short const_reverse_data_transfer_intra_8x8;
56     /* R1.6 */
57     unsigned char intra_16x16_luma_mode_0_offset;
58     unsigned char intra_16x16_luma_mode_1_offset;
59     unsigned char intra_16x16_luma_mode_2_offset;
60     unsigned char intra_16x16_luma_mode_3_offset;
61     /* R1.7 */
62     unsigned char intra_chroma_mode_0_offset;
63     unsigned char intra_chroma_mode_1_offset;
64     unsigned char intra_chroma_mode_2_offset;
65     unsigned char intra_chroma_mode_3_offset;
66     /* R2.0 */
67     unsigned int const_intra_16x16_plane_0;
68     /* R2.1 */
69     unsigned int const_intra_16x16_chroma_plane_0;
70     /* R2.2 */
71     unsigned int const_intra_16x16_chroma_plane_1;
72     /* R2.3 */
73     unsigned int const_intra_16x16_plane_1;
74     /* R2.4 */
75     unsigned int left_shift_count_reverse_dw_ordering;
76     /* R2.5 */
77     unsigned int const_reverse_data_transfer_intra_4x4;
78     /* R2.6 */
79     unsigned int intra_4x4_pred_mode_offset;
80 };
81
82 struct inter_kernel_header
83 {
84     unsigned short weight_offset;
85     unsigned char weight_offset_flag;
86     unsigned char pad0;
87 };
88
89 #include "shaders/h264/mc/export.inc"
90 static unsigned long avc_mc_kernel_offset_gen4[] = {
91     INTRA_16x16_IP * INST_UNIT_GEN4,
92     INTRA_8x8_IP * INST_UNIT_GEN4,
93     INTRA_4x4_IP * INST_UNIT_GEN4,
94     INTRA_PCM_IP * INST_UNIT_GEN4,
95     FRAME_MB_IP * INST_UNIT_GEN4,
96     FIELD_MB_IP * INST_UNIT_GEN4,
97     MBAFF_MB_IP * INST_UNIT_GEN4
98 };
99
100 struct intra_kernel_header intra_kernel_header_gen4 = {
101     0,
102     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
103     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
104     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
105
106     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
107     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
108     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
109     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
110
111     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
112     0,
113     0xFFFC,
114
115     0,
116     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
117     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
118     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
119
120     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
121     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
122     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
123     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
124
125     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
126     0,
127     0x0001,
128
129     0,
130     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
131     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
132     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
133
134     0,
135     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
136     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
137     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
138
139     0xFCFBFAF9,
140
141     0x00FFFEFD,
142
143     0x04030201,
144
145     0x08070605,
146
147     0x18100800,
148
149     0x00020406,
150
151     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 + 
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 + 
154     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
155 };
156
157 static const uint32_t h264_avc_combined_gen4[][4] = {
158 #include "shaders/h264/mc/avc_mc.g4b"
159 };
160
161 static const uint32_t h264_avc_null_gen4[][4] = {
162 #include "shaders/h264/mc/null.g4b"
163 };
164
165 static struct i965_kernel h264_avc_kernels_gen4[] = {
166     {
167         "AVC combined kernel",
168         H264_AVC_COMBINED,
169         h264_avc_combined_gen4,
170         sizeof(h264_avc_combined_gen4),
171         NULL
172     },
173
174     {
175         "NULL kernel",
176         H264_AVC_NULL,
177         h264_avc_null_gen4,
178         sizeof(h264_avc_null_gen4),
179         NULL
180     }
181 };
182
183 /* On Ironlake */
184 #include "shaders/h264/mc/export.inc.gen5"
185 static unsigned long avc_mc_kernel_offset_gen5[] = {
186     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
187     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
188     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
189     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
190     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
191     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
192     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
193 };
194
195 struct intra_kernel_header intra_kernel_header_gen5 = {
196     0,
197     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
198     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
199     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
200
201     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
202     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
203     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
204     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
205
206     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
207     0,
208     0xFFFC,
209
210     0,
211     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
212     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
213     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
214
215     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
216     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
217     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
218     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
219
220     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
221     0,
222     0x0001,
223
224     0,
225     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
226     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
227     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
228
229     0,
230     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
231     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
232     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
233
234     0xFCFBFAF9,
235
236     0x00FFFEFD,
237
238     0x04030201,
239
240     0x08070605,
241
242     0x18100800,
243
244     0x00020406,
245
246     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 + 
247     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 + 
248     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 + 
249     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
250 };
251
252 static const uint32_t h264_avc_combined_gen5[][4] = {
253 #include "shaders/h264/mc/avc_mc.g4b.gen5"
254 };
255
256 static const uint32_t h264_avc_null_gen5[][4] = {
257 #include "shaders/h264/mc/null.g4b.gen5"
258 };
259
260 static struct i965_kernel h264_avc_kernels_gen5[] = {
261     {
262         "AVC combined kernel",
263         H264_AVC_COMBINED,
264         h264_avc_combined_gen5,
265         sizeof(h264_avc_combined_gen5),
266         NULL
267     },
268
269     {
270         "NULL kernel",
271         H264_AVC_NULL,
272         h264_avc_null_gen5,
273         sizeof(h264_avc_null_gen5),
274         NULL
275     }
276 };
277
278 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
279 static unsigned long *avc_mc_kernel_offset = NULL;
280
281 static struct intra_kernel_header *intra_kernel_header = NULL;
282
283 static void
284 i965_media_h264_surface_state(VADriverContextP ctx, 
285                               int index,
286                               struct object_surface *obj_surface,
287                               unsigned long offset, 
288                               int w, int h, int pitch,
289                               Bool is_dst,
290                               int vert_line_stride,
291                               int vert_line_stride_ofs,
292                               int format,
293                               struct i965_media_context *media_context)
294 {
295     struct i965_driver_data *i965 = i965_driver_data(ctx);
296     struct i965_surface_state *ss;
297     dri_bo *bo;
298     uint32_t write_domain, read_domain;
299
300     assert(obj_surface->bo);
301
302     bo = dri_bo_alloc(i965->intel.bufmgr,
303                       "surface state", 
304                       sizeof(struct i965_surface_state), 32);
305     assert(bo);
306     dri_bo_map(bo, 1);
307     assert(bo->virtual);
308     ss = bo->virtual;
309     memset(ss, 0, sizeof(*ss));
310     ss->ss0.surface_type = I965_SURFACE_2D;
311     ss->ss0.surface_format = format;
312     ss->ss0.vert_line_stride = vert_line_stride;
313     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
314     ss->ss1.base_addr = obj_surface->bo->offset + offset;
315     ss->ss2.width = w - 1;
316     ss->ss2.height = h - 1;
317     ss->ss3.pitch = pitch - 1;
318
319     if (is_dst) {
320         write_domain = I915_GEM_DOMAIN_RENDER;
321         read_domain = I915_GEM_DOMAIN_RENDER;
322     } else {
323         write_domain = 0;
324         read_domain = I915_GEM_DOMAIN_SAMPLER;
325     }
326
327     dri_bo_emit_reloc(bo,
328                       read_domain, write_domain,
329                       offset,
330                       offsetof(struct i965_surface_state, ss1),
331                       obj_surface->bo);
332     dri_bo_unmap(bo);
333
334     assert(index < MAX_MEDIA_SURFACES);
335     media_context->surface_state[index].bo = bo;
336 }
337
338 static void 
339 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
340                                struct decode_state *decode_state,
341                                struct i965_media_context *media_context)
342 {
343     struct i965_h264_context *i965_h264_context;
344     struct object_surface *obj_surface;
345     VAPictureParameterBufferH264 *pic_param;
346     VAPictureH264 *va_pic;
347     int i, w, h;
348     int field_picture;
349
350     assert(media_context->private_context);
351     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
352
353     assert(decode_state->pic_param && decode_state->pic_param->buffer);
354     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
355
356     /* Target Picture */
357     va_pic = &pic_param->CurrPic;
358     obj_surface = decode_state->render_object;
359     w = obj_surface->width;
360     h = obj_surface->height;
361     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
362     i965_media_h264_surface_state(ctx, 0, obj_surface,
363                                   0, w / 4, h / (1 + field_picture), w,
364                                   1, 
365                                   field_picture,
366                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
367                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
368                                   media_context);
369     i965_media_h264_surface_state(ctx, 1, obj_surface,
370                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
371                                   1, 
372                                   field_picture,
373                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
374                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
375                                   media_context);
376
377     /* Reference Pictures */
378     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
379         struct object_surface * const obj_surface =
380             i965_h264_context->fsid_list[i].obj_surface;
381
382         if (obj_surface) {
383             const VAPictureH264 * const va_pic = avc_find_picture(
384                 obj_surface->base.id, pic_param->ReferenceFrames,
385                 ARRAY_ELEMS(pic_param->ReferenceFrames));
386
387             assert(va_pic != NULL);
388             w = obj_surface->width;
389             h = obj_surface->height;
390             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
391             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
392                                           0, w / 4, h / (1 + field_picture), w,
393                                           0, 
394                                           field_picture,
395                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
396                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
397                                           media_context);
398             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
399                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
400                                           0, 
401                                           field_picture,
402                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
403                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
404                                           media_context);
405         }
406     }
407 }
408
409 static void
410 i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
411 {
412     int i;
413     unsigned int *binding_table;
414     dri_bo *bo = media_context->binding_table.bo;
415
416     dri_bo_map(bo, 1);
417     assert(bo->virtual);
418     binding_table = bo->virtual;
419     memset(binding_table, 0, bo->size);
420
421     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
422         if (media_context->surface_state[i].bo) {
423             binding_table[i] = media_context->surface_state[i].bo->offset;
424             dri_bo_emit_reloc(bo,
425                               I915_GEM_DOMAIN_INSTRUCTION, 0,
426                               0,
427                               i * sizeof(*binding_table),
428                               media_context->surface_state[i].bo);
429         }
430     }
431
432     dri_bo_unmap(media_context->binding_table.bo);
433 }
434
435 static void 
436 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
437 {
438     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
439     struct i965_interface_descriptor *desc;
440     int i;
441     dri_bo *bo;
442
443     bo = media_context->idrt.bo;
444     dri_bo_map(bo, 1);
445     assert(bo->virtual);
446     desc = bo->virtual;
447
448     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
449         int kernel_offset = avc_mc_kernel_offset[i];
450         memset(desc, 0, sizeof(*desc));
451         desc->desc0.grf_reg_blocks = 7; 
452         desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
453         desc->desc1.const_urb_entry_read_offset = 0;
454         desc->desc1.const_urb_entry_read_len = 2;
455         desc->desc3.binding_table_entry_count = 0;
456         desc->desc3.binding_table_pointer = 
457             media_context->binding_table.bo->offset >> 5; /*reloc */
458
459         dri_bo_emit_reloc(bo,
460                           I915_GEM_DOMAIN_INSTRUCTION, 0,
461                           desc->desc0.grf_reg_blocks + kernel_offset,
462                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
463                           i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
464
465         dri_bo_emit_reloc(bo,
466                           I915_GEM_DOMAIN_INSTRUCTION, 0,
467                           desc->desc3.binding_table_entry_count,
468                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
469                           media_context->binding_table.bo);
470         desc++;
471     }
472
473     dri_bo_unmap(bo);
474 }
475
476 static void
477 i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
478 {
479     struct i965_vfe_state *vfe_state;
480     dri_bo *bo;
481
482     bo = media_context->vfe_state.bo;
483     dri_bo_map(bo, 1);
484     assert(bo->virtual);
485     vfe_state = bo->virtual;
486     memset(vfe_state, 0, sizeof(*vfe_state));
487     vfe_state->vfe0.extend_vfe_state_present = 1;
488     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
489     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
490     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
491     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
492     vfe_state->vfe1.children_present = 0;
493     vfe_state->vfe2.interface_descriptor_base = 
494         media_context->idrt.bo->offset >> 4; /* reloc */
495     dri_bo_emit_reloc(bo,
496                       I915_GEM_DOMAIN_INSTRUCTION, 0,
497                       0,
498                       offsetof(struct i965_vfe_state, vfe2),
499                       media_context->idrt.bo);
500     dri_bo_unmap(bo);
501 }
502
503 static void 
504 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
505                                     struct decode_state *decode_state,
506                                     struct i965_media_context *media_context)
507 {
508     struct i965_h264_context *i965_h264_context;
509     struct i965_vfe_state_ex *vfe_state_ex;
510     VAPictureParameterBufferH264 *pic_param;
511     int mbaff_frame_flag;
512
513     assert(media_context->private_context);
514     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
515
516     assert(decode_state->pic_param && decode_state->pic_param->buffer);
517     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
518     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
519                         !pic_param->pic_fields.bits.field_pic_flag);
520
521     assert(media_context->extended_state.bo);
522     dri_bo_map(media_context->extended_state.bo, 1);
523     assert(media_context->extended_state.bo->virtual);
524     vfe_state_ex = media_context->extended_state.bo->virtual;
525     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
526
527     /*
528      * Indirect data buffer:
529      * --------------------------------------------------------
530      * | Motion Vectors | Weight/Offset data | Residual data |
531      * --------------------------------------------------------
532      * R4-R7: Motion Vectors
533      * R8-R9: Weight/Offset
534      * R10-R33: Residual data
535      */
536     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
537     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
538
539     if (i965_h264_context->picture.i_flag) {
540         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
541         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
542         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
543     } else {
544         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
545         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
546         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
547     }
548
549     if (!pic_param->pic_fields.bits.field_pic_flag) {
550         if (mbaff_frame_flag) {
551             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
552             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
553             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
554             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
555             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
556             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
557             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
558             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
559
560             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
561             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
562             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
563             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
564             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
565             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
566             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
567             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
568         } else {
569             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
570             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
571             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
572             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
573             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
574             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
575             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
576             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
577
578             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
579             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
580             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
581             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
582             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
583             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
584             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
585             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
586         }
587     } else {
588         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
589         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
590         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
591         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
592         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
593         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
594         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
595         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
596
597         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
598         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
599         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
600         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
601         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
602         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
603         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
604         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
605     }
606
607     if (i965_h264_context->use_avc_hw_scoreboard) {
608         vfe_state_ex->scoreboard0.enable = 1;
609         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
610         vfe_state_ex->scoreboard0.mask = 0xff;
611
612         vfe_state_ex->scoreboard1.delta_x0 = -1;
613         vfe_state_ex->scoreboard1.delta_y0 = 0;
614         vfe_state_ex->scoreboard1.delta_x1 = 0;
615         vfe_state_ex->scoreboard1.delta_y1 = -1;
616         vfe_state_ex->scoreboard1.delta_x2 = 1;
617         vfe_state_ex->scoreboard1.delta_y2 = -1;
618         vfe_state_ex->scoreboard1.delta_x3 = -1;
619         vfe_state_ex->scoreboard1.delta_y3 = -1;
620
621         vfe_state_ex->scoreboard2.delta_x4 = -1;
622         vfe_state_ex->scoreboard2.delta_y4 = 1;
623         vfe_state_ex->scoreboard2.delta_x5 = 0;
624         vfe_state_ex->scoreboard2.delta_y5 = -2;
625         vfe_state_ex->scoreboard2.delta_x6 = 1;
626         vfe_state_ex->scoreboard2.delta_y6 = -2;
627         vfe_state_ex->scoreboard2.delta_x7 = -1;
628         vfe_state_ex->scoreboard2.delta_y7 = -2;
629     }
630
631     dri_bo_unmap(media_context->extended_state.bo);
632 }
633
634 static void
635 i965_media_h264_upload_constants(VADriverContextP ctx,
636                                  struct decode_state *decode_state,
637                                  struct i965_media_context *media_context)
638 {
639     struct i965_h264_context *i965_h264_context;
640     unsigned char *constant_buffer;
641     VASliceParameterBufferH264 *slice_param;
642
643     assert(media_context->private_context);
644     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
645
646     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
647     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
648
649     dri_bo_map(media_context->curbe.bo, 1);
650     assert(media_context->curbe.bo->virtual);
651     constant_buffer = media_context->curbe.bo->virtual;
652
653     /* HW solution for W=128 */
654     if (i965_h264_context->use_hw_w128) {
655         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
656     } else {
657         if (slice_param->slice_type == SLICE_TYPE_I ||
658             slice_param->slice_type == SLICE_TYPE_SI) {
659             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
660         } else {
661             /* FIXME: Need to upload CURBE data to inter kernel interface 
662              * to support weighted prediction work-around 
663              */
664             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
665             constant_buffer += 2;
666             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
667             constant_buffer++;
668             *constant_buffer = 0;
669         }
670     }
671
672     dri_bo_unmap(media_context->curbe.bo);
673 }
674
675 static void
676 i965_media_h264_states_setup(VADriverContextP ctx,
677                              struct decode_state *decode_state,
678                              struct i965_media_context *media_context)
679 {
680     struct i965_h264_context *i965_h264_context;
681
682     assert(media_context->private_context);
683     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
684
685     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
686
687     if (i965_h264_context->use_avc_hw_scoreboard)
688         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
689
690     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
691     i965_media_h264_binding_table(ctx, media_context);
692     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
693     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
694     i965_media_h264_vfe_state(ctx, media_context);
695     i965_media_h264_upload_constants(ctx, decode_state, media_context);
696 }
697
698 static void
699 i965_media_h264_objects(VADriverContextP ctx,
700                         struct decode_state *decode_state,
701                         struct i965_media_context *media_context)
702 {
703     struct intel_batchbuffer *batch = media_context->base.batch;
704     struct i965_h264_context *i965_h264_context;
705     unsigned int *object_command;
706
707     assert(media_context->private_context);
708     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
709
710     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
711     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
712     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
713     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
714     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
715     *object_command++ = 0;
716     *object_command = MI_BATCH_BUFFER_END;
717     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
718
719     BEGIN_BATCH(batch, 2);
720     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
721     OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo, 
722               I915_GEM_DOMAIN_COMMAND, 0, 
723               0);
724     ADVANCE_BATCH(batch);
725
726     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
727      * will cause control to pass back to ring buffer 
728      */
729     intel_batchbuffer_end_atomic(batch);
730     intel_batchbuffer_flush(batch);
731     intel_batchbuffer_start_atomic(batch, 0x1000);
732     i965_avc_ildb(ctx, decode_state, i965_h264_context);
733 }
734
735 static void 
736 i965_media_h264_free_private_context(void **data)
737 {
738     struct i965_h264_context *i965_h264_context = *data;
739     int i;
740
741     if (i965_h264_context == NULL)
742         return;
743
744     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
745     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
746     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
747     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
748     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
749     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
750
751     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
752         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
753
754         dri_bo_unreference(kernel->bo);
755         kernel->bo = NULL;
756     }
757
758     free(i965_h264_context);
759     *data = NULL;
760 }
761
762 void
763 i965_media_h264_decode_init(VADriverContextP ctx, 
764                             struct decode_state *decode_state, 
765                             struct i965_media_context *media_context)
766 {
767     struct i965_driver_data *i965 = i965_driver_data(ctx);
768     struct i965_h264_context *i965_h264_context = media_context->private_context;
769     dri_bo *bo;
770     VAPictureParameterBufferH264 *pic_param;
771
772     assert(decode_state->pic_param && decode_state->pic_param->buffer);
773     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
774     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
775     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
776         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
777     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
778                                                    !pic_param->pic_fields.bits.field_pic_flag);
779     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
780                                                      i965_h264_context->picture.height_in_mbs);
781
782     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
783     bo = dri_bo_alloc(i965->intel.bufmgr,
784                       "avc it command mb info",
785                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
786                       0x1000);
787     assert(bo);
788     i965_h264_context->avc_it_command_mb_info.bo = bo;
789
790     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
791     bo = dri_bo_alloc(i965->intel.bufmgr,
792                       "avc it data",
793                       i965_h264_context->avc_it_command_mb_info.mbs * 
794                       0x800 * 
795                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
796                       0x1000);
797     assert(bo);
798     i965_h264_context->avc_it_data.bo = bo;
799     i965_h264_context->avc_it_data.write_offset = 0;
800     dri_bo_unreference(media_context->indirect_object.bo);
801     media_context->indirect_object.bo = bo;
802     dri_bo_reference(media_context->indirect_object.bo);
803     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
804
805     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
806     bo = dri_bo_alloc(i965->intel.bufmgr,
807                       "AVC-ILDB Data Buffer",
808                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
809                       0x1000);
810     assert(bo);
811     i965_h264_context->avc_ildb_data.bo = bo;
812
813     /* bsd pipeline */
814     i965_avc_bsd_decode_init(ctx, i965_h264_context);
815
816     /* HW scoreboard */
817     if (i965_h264_context->use_avc_hw_scoreboard)
818         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
819
820     /* ILDB */
821     i965_avc_ildb_decode_init(ctx, i965_h264_context);
822
823     /* for Media pipeline */
824     media_context->extended_state.enabled = 1;
825     dri_bo_unreference(media_context->extended_state.bo);
826     bo = dri_bo_alloc(i965->intel.bufmgr, 
827                       "extended vfe state", 
828                       sizeof(struct i965_vfe_state_ex), 32);
829     assert(bo);
830     media_context->extended_state.bo = bo;
831 }
832
833 void 
834 i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
835 {
836     struct i965_driver_data *i965 = i965_driver_data(ctx);
837     struct i965_h264_context *i965_h264_context;
838     int i;
839
840     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
841
842     /* kernel */
843     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
844                                     sizeof(h264_avc_kernels_gen5[0])));
845     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
846                                      sizeof(avc_mc_kernel_offset_gen5[0])));
847     assert(i965_h264_context);
848     if (IS_IRONLAKE(i965->intel.device_info)) {
849         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
850         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
851         intra_kernel_header = &intra_kernel_header_gen5;
852         i965_h264_context->use_avc_hw_scoreboard = 1;
853         i965_h264_context->use_hw_w128 = 1;
854     } else {
855         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
856         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
857         intra_kernel_header = &intra_kernel_header_gen4;
858         i965_h264_context->use_avc_hw_scoreboard = 0;
859         i965_h264_context->use_hw_w128 = 0;
860     }
861
862     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
863         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
864         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
865                                   kernel->name, 
866                                   kernel->size, 0x1000);
867         assert(kernel->bo);
868         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
869     }
870
871     for (i = 0; i < 16; i++) {
872         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
873         i965_h264_context->fsid_list[i].frame_store_id = -1;
874     }
875
876     i965_h264_context->batch = media_context->base.batch;
877
878     media_context->private_context = i965_h264_context;
879     media_context->free_private_context = i965_media_h264_free_private_context;
880
881     /* URB */
882     if (IS_IRONLAKE(i965->intel.device_info)) {
883         media_context->urb.num_vfe_entries = 63;
884     } else {
885         media_context->urb.num_vfe_entries = 23;
886     }
887
888     media_context->urb.size_vfe_entry = 16;
889
890     media_context->urb.num_cs_entries = 1;
891     media_context->urb.size_cs_entry = 1;
892
893     media_context->urb.vfe_start = 0;
894     media_context->urb.cs_start = media_context->urb.vfe_start + 
895         media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
896     assert(media_context->urb.cs_start + 
897            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
898
899     /* hook functions */
900     media_context->media_states_setup = i965_media_h264_states_setup;
901     media_context->media_objects = i965_media_h264_objects;
902 }