OSDN Git Service

decode: release huffman_table from decode state
[android-x86/hardware-intel-common-vaapi.git] / src / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6
7 #include "intel_batchbuffer.h"
8 #include "intel_driver.h"
9
10 #include "i965_defines.h"
11 #include "i965_drv_video.h"
12 #include "i965_media.h"
13 #include "i965_media_h264.h"
14 #include "i965_decoder_utils.h"
15
16 enum {
17     INTRA_16X16 = 0,
18     INTRA_8X8,
19     INTRA_4X4,
20     INTRA_PCM,
21     FRAMEMB_MOTION,
22     FIELDMB_MOTION,
23     MBAFF_MOTION,
24 };
25
26 struct intra_kernel_header {
27     /* R1.0 */
28     unsigned char intra_4x4_luma_mode_0_offset;
29     unsigned char intra_4x4_luma_mode_1_offset;
30     unsigned char intra_4x4_luma_mode_2_offset;
31     unsigned char intra_4x4_luma_mode_3_offset;
32     /* R1.1 */
33     unsigned char intra_4x4_luma_mode_4_offset;
34     unsigned char intra_4x4_luma_mode_5_offset;
35     unsigned char intra_4x4_luma_mode_6_offset;
36     unsigned char intra_4x4_luma_mode_7_offset;
37     /* R1.2 */
38     unsigned char intra_4x4_luma_mode_8_offset;
39     unsigned char pad0;
40     unsigned short top_reference_offset;
41     /* R1.3 */
42     unsigned char intra_8x8_luma_mode_0_offset;
43     unsigned char intra_8x8_luma_mode_1_offset;
44     unsigned char intra_8x8_luma_mode_2_offset;
45     unsigned char intra_8x8_luma_mode_3_offset;
46     /* R1.4 */
47     unsigned char intra_8x8_luma_mode_4_offset;
48     unsigned char intra_8x8_luma_mode_5_offset;
49     unsigned char intra_8x8_luma_mode_6_offset;
50     unsigned char intra_8x8_luma_mode_7_offset;
51     /* R1.5 */
52     unsigned char intra_8x8_luma_mode_8_offset;
53     unsigned char pad1;
54     unsigned short const_reverse_data_transfer_intra_8x8;
55     /* R1.6 */
56     unsigned char intra_16x16_luma_mode_0_offset;
57     unsigned char intra_16x16_luma_mode_1_offset;
58     unsigned char intra_16x16_luma_mode_2_offset;
59     unsigned char intra_16x16_luma_mode_3_offset;
60     /* R1.7 */
61     unsigned char intra_chroma_mode_0_offset;
62     unsigned char intra_chroma_mode_1_offset;
63     unsigned char intra_chroma_mode_2_offset;
64     unsigned char intra_chroma_mode_3_offset;
65     /* R2.0 */
66     unsigned int const_intra_16x16_plane_0;
67     /* R2.1 */
68     unsigned int const_intra_16x16_chroma_plane_0;
69     /* R2.2 */
70     unsigned int const_intra_16x16_chroma_plane_1;
71     /* R2.3 */
72     unsigned int const_intra_16x16_plane_1;
73     /* R2.4 */
74     unsigned int left_shift_count_reverse_dw_ordering;
75     /* R2.5 */
76     unsigned int const_reverse_data_transfer_intra_4x4;
77     /* R2.6 */
78     unsigned int intra_4x4_pred_mode_offset;
79 };
80
81 struct inter_kernel_header {
82     unsigned short weight_offset;
83     unsigned char weight_offset_flag;
84     unsigned char pad0;
85 };
86
87 #include "shaders/h264/mc/export.inc"
88 static unsigned long avc_mc_kernel_offset_gen4[] = {
89     INTRA_16x16_IP * INST_UNIT_GEN4,
90     INTRA_8x8_IP * INST_UNIT_GEN4,
91     INTRA_4x4_IP * INST_UNIT_GEN4,
92     INTRA_PCM_IP * INST_UNIT_GEN4,
93     FRAME_MB_IP * INST_UNIT_GEN4,
94     FIELD_MB_IP * INST_UNIT_GEN4,
95     MBAFF_MB_IP * INST_UNIT_GEN4
96 };
97
98 struct intra_kernel_header intra_kernel_header_gen4 = {
99     0,
100     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
101     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
102     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
103
104     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
105     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
106     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
107     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
108
109     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
110     0,
111     0xFFFC,
112
113     0,
114     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
115     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
116     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
117
118     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
119     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
120     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
121     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
122
123     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
124     0,
125     0x0001,
126
127     0,
128     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
129     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
130     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
131
132     0,
133     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
134     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
135     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
136
137     0xFCFBFAF9,
138
139     0x00FFFEFD,
140
141     0x04030201,
142
143     0x08070605,
144
145     0x18100800,
146
147     0x00020406,
148
149     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 +
150     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 +
151     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 +
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
153 };
154
155 static const uint32_t h264_avc_combined_gen4[][4] = {
156 #include "shaders/h264/mc/avc_mc.g4b"
157 };
158
159 static const uint32_t h264_avc_null_gen4[][4] = {
160 #include "shaders/h264/mc/null.g4b"
161 };
162
163 static struct i965_kernel h264_avc_kernels_gen4[] = {
164     {
165         "AVC combined kernel",
166         H264_AVC_COMBINED,
167         h264_avc_combined_gen4,
168         sizeof(h264_avc_combined_gen4),
169         NULL
170     },
171
172     {
173         "NULL kernel",
174         H264_AVC_NULL,
175         h264_avc_null_gen4,
176         sizeof(h264_avc_null_gen4),
177         NULL
178     }
179 };
180
181 /* On Ironlake */
182 #include "shaders/h264/mc/export.inc.gen5"
183 static unsigned long avc_mc_kernel_offset_gen5[] = {
184     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
185     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
186     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
187     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
188     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
189     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
190     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
191 };
192
193 struct intra_kernel_header intra_kernel_header_gen5 = {
194     0,
195     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
196     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
197     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
198
199     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
200     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
201     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
202     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
203
204     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
205     0,
206     0xFFFC,
207
208     0,
209     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
210     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
211     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
212
213     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
214     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
215     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
216     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
217
218     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
219     0,
220     0x0001,
221
222     0,
223     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
224     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
225     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
226
227     0,
228     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
229     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
230     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
231
232     0xFCFBFAF9,
233
234     0x00FFFEFD,
235
236     0x04030201,
237
238     0x08070605,
239
240     0x18100800,
241
242     0x00020406,
243
244     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 +
245     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 +
246     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 +
247     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
248 };
249
250 static const uint32_t h264_avc_combined_gen5[][4] = {
251 #include "shaders/h264/mc/avc_mc.g4b.gen5"
252 };
253
254 static const uint32_t h264_avc_null_gen5[][4] = {
255 #include "shaders/h264/mc/null.g4b.gen5"
256 };
257
258 static struct i965_kernel h264_avc_kernels_gen5[] = {
259     {
260         "AVC combined kernel",
261         H264_AVC_COMBINED,
262         h264_avc_combined_gen5,
263         sizeof(h264_avc_combined_gen5),
264         NULL
265     },
266
267     {
268         "NULL kernel",
269         H264_AVC_NULL,
270         h264_avc_null_gen5,
271         sizeof(h264_avc_null_gen5),
272         NULL
273     }
274 };
275
276 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
277 static unsigned long *avc_mc_kernel_offset = NULL;
278
279 static struct intra_kernel_header *intra_kernel_header = NULL;
280
281 static void
282 i965_media_h264_surface_state(VADriverContextP ctx,
283                               int index,
284                               struct object_surface *obj_surface,
285                               unsigned long offset,
286                               int w, int h, int pitch,
287                               Bool is_dst,
288                               int vert_line_stride,
289                               int vert_line_stride_ofs,
290                               int format,
291                               struct i965_media_context *media_context)
292 {
293     struct i965_driver_data *i965 = i965_driver_data(ctx);
294     struct i965_surface_state *ss;
295     dri_bo *bo;
296     uint32_t write_domain, read_domain;
297
298     assert(obj_surface->bo);
299
300     bo = dri_bo_alloc(i965->intel.bufmgr,
301                       "surface state",
302                       sizeof(struct i965_surface_state), 32);
303     assert(bo);
304     dri_bo_map(bo, 1);
305     assert(bo->virtual);
306     ss = bo->virtual;
307     memset(ss, 0, sizeof(*ss));
308     ss->ss0.surface_type = I965_SURFACE_2D;
309     ss->ss0.surface_format = format;
310     ss->ss0.vert_line_stride = vert_line_stride;
311     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
312     ss->ss1.base_addr = obj_surface->bo->offset + offset;
313     ss->ss2.width = w - 1;
314     ss->ss2.height = h - 1;
315     ss->ss3.pitch = pitch - 1;
316
317     if (is_dst) {
318         write_domain = I915_GEM_DOMAIN_RENDER;
319         read_domain = I915_GEM_DOMAIN_RENDER;
320     } else {
321         write_domain = 0;
322         read_domain = I915_GEM_DOMAIN_SAMPLER;
323     }
324
325     dri_bo_emit_reloc(bo,
326                       read_domain, write_domain,
327                       offset,
328                       offsetof(struct i965_surface_state, ss1),
329                       obj_surface->bo);
330     dri_bo_unmap(bo);
331
332     assert(index < MAX_MEDIA_SURFACES);
333     media_context->surface_state[index].bo = bo;
334 }
335
336 static void
337 i965_media_h264_surfaces_setup(VADriverContextP ctx,
338                                struct decode_state *decode_state,
339                                struct i965_media_context *media_context)
340 {
341     struct i965_h264_context *i965_h264_context;
342     struct object_surface *obj_surface;
343     VAPictureParameterBufferH264 *pic_param;
344     VAPictureH264 *va_pic;
345     int i, w, h;
346     int field_picture;
347
348     assert(media_context->private_context);
349     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
350
351     assert(decode_state->pic_param && decode_state->pic_param->buffer);
352     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
353
354     /* Target Picture */
355     va_pic = &pic_param->CurrPic;
356     obj_surface = decode_state->render_object;
357     w = obj_surface->width;
358     h = obj_surface->height;
359     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
360     i965_media_h264_surface_state(ctx, 0, obj_surface,
361                                   0, w / 4, h / (1 + field_picture), w,
362                                   1,
363                                   field_picture,
364                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
365                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
366                                   media_context);
367     i965_media_h264_surface_state(ctx, 1, obj_surface,
368                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
369                                   1,
370                                   field_picture,
371                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
372                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
373                                   media_context);
374
375     /* Reference Pictures */
376     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
377         struct object_surface * const obj_surface =
378                     i965_h264_context->fsid_list[i].obj_surface;
379
380         if (obj_surface) {
381             const VAPictureH264 * const va_pic = avc_find_picture(
382                                                      obj_surface->base.id, pic_param->ReferenceFrames,
383                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
384
385             assert(va_pic != NULL);
386             w = obj_surface->width;
387             h = obj_surface->height;
388             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
389             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
390                                           0, w / 4, h / (1 + field_picture), w,
391                                           0,
392                                           field_picture,
393                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
394                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
395                                           media_context);
396             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
397                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
398                                           0,
399                                           field_picture,
400                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
401                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
402                                           media_context);
403         }
404     }
405 }
406
407 static void
408 i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
409 {
410     int i;
411     unsigned int *binding_table;
412     dri_bo *bo = media_context->binding_table.bo;
413
414     dri_bo_map(bo, 1);
415     assert(bo->virtual);
416     binding_table = bo->virtual;
417     memset(binding_table, 0, bo->size);
418
419     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
420         if (media_context->surface_state[i].bo) {
421             binding_table[i] = media_context->surface_state[i].bo->offset;
422             dri_bo_emit_reloc(bo,
423                               I915_GEM_DOMAIN_INSTRUCTION, 0,
424                               0,
425                               i * sizeof(*binding_table),
426                               media_context->surface_state[i].bo);
427         }
428     }
429
430     dri_bo_unmap(media_context->binding_table.bo);
431 }
432
433 static void
434 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
435 {
436     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
437     struct i965_interface_descriptor *desc;
438     int i;
439     dri_bo *bo;
440
441     bo = media_context->idrt.bo;
442     dri_bo_map(bo, 1);
443     assert(bo->virtual);
444     desc = bo->virtual;
445
446     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
447         int kernel_offset = avc_mc_kernel_offset[i];
448         memset(desc, 0, sizeof(*desc));
449         desc->desc0.grf_reg_blocks = 7;
450         desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
451         desc->desc1.const_urb_entry_read_offset = 0;
452         desc->desc1.const_urb_entry_read_len = 2;
453         desc->desc3.binding_table_entry_count = 0;
454         desc->desc3.binding_table_pointer =
455             media_context->binding_table.bo->offset >> 5; /*reloc */
456
457         dri_bo_emit_reloc(bo,
458                           I915_GEM_DOMAIN_INSTRUCTION, 0,
459                           desc->desc0.grf_reg_blocks + kernel_offset,
460                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
461                           i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
462
463         dri_bo_emit_reloc(bo,
464                           I915_GEM_DOMAIN_INSTRUCTION, 0,
465                           desc->desc3.binding_table_entry_count,
466                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
467                           media_context->binding_table.bo);
468         desc++;
469     }
470
471     dri_bo_unmap(bo);
472 }
473
474 static void
475 i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
476 {
477     struct i965_vfe_state *vfe_state;
478     dri_bo *bo;
479
480     bo = media_context->vfe_state.bo;
481     dri_bo_map(bo, 1);
482     assert(bo->virtual);
483     vfe_state = bo->virtual;
484     memset(vfe_state, 0, sizeof(*vfe_state));
485     vfe_state->vfe0.extend_vfe_state_present = 1;
486     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
487     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
488     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
489     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
490     vfe_state->vfe1.children_present = 0;
491     vfe_state->vfe2.interface_descriptor_base =
492         media_context->idrt.bo->offset >> 4; /* reloc */
493     dri_bo_emit_reloc(bo,
494                       I915_GEM_DOMAIN_INSTRUCTION, 0,
495                       0,
496                       offsetof(struct i965_vfe_state, vfe2),
497                       media_context->idrt.bo);
498     dri_bo_unmap(bo);
499 }
500
501 static void
502 i965_media_h264_vfe_state_extension(VADriverContextP ctx,
503                                     struct decode_state *decode_state,
504                                     struct i965_media_context *media_context)
505 {
506     struct i965_h264_context *i965_h264_context;
507     struct i965_vfe_state_ex *vfe_state_ex;
508     VAPictureParameterBufferH264 *pic_param;
509     int mbaff_frame_flag;
510
511     assert(media_context->private_context);
512     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
513
514     assert(decode_state->pic_param && decode_state->pic_param->buffer);
515     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
516     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
517                         !pic_param->pic_fields.bits.field_pic_flag);
518
519     assert(media_context->extended_state.bo);
520     dri_bo_map(media_context->extended_state.bo, 1);
521     assert(media_context->extended_state.bo->virtual);
522     vfe_state_ex = media_context->extended_state.bo->virtual;
523     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
524
525     /*
526      * Indirect data buffer:
527      * --------------------------------------------------------
528      * | Motion Vectors | Weight/Offset data | Residual data |
529      * --------------------------------------------------------
530      * R4-R7: Motion Vectors
531      * R8-R9: Weight/Offset
532      * R10-R33: Residual data
533      */
534     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
535     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
536
537     if (i965_h264_context->picture.i_flag) {
538         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
539         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
540         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
541     } else {
542         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
543         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
544         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
545     }
546
547     if (!pic_param->pic_fields.bits.field_pic_flag) {
548         if (mbaff_frame_flag) {
549             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
550             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
551             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
552             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
553             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
554             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
555             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
556             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
557
558             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
559             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
560             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
561             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
562             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
563             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
564             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
565             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
566         } else {
567             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
568             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
569             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
570             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
571             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
572             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
573             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
574             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
575
576             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
577             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
578             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
579             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
580             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
581             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
582             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
583             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
584         }
585     } else {
586         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
587         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
588         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
589         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
590         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
591         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
592         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
593         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
594
595         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
596         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
597         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
598         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
599         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
600         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
601         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
602         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
603     }
604
605     if (i965_h264_context->use_avc_hw_scoreboard) {
606         vfe_state_ex->scoreboard0.enable = 1;
607         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
608         vfe_state_ex->scoreboard0.mask = 0xff;
609
610         vfe_state_ex->scoreboard1.delta_x0 = -1;
611         vfe_state_ex->scoreboard1.delta_y0 = 0;
612         vfe_state_ex->scoreboard1.delta_x1 = 0;
613         vfe_state_ex->scoreboard1.delta_y1 = -1;
614         vfe_state_ex->scoreboard1.delta_x2 = 1;
615         vfe_state_ex->scoreboard1.delta_y2 = -1;
616         vfe_state_ex->scoreboard1.delta_x3 = -1;
617         vfe_state_ex->scoreboard1.delta_y3 = -1;
618
619         vfe_state_ex->scoreboard2.delta_x4 = -1;
620         vfe_state_ex->scoreboard2.delta_y4 = 1;
621         vfe_state_ex->scoreboard2.delta_x5 = 0;
622         vfe_state_ex->scoreboard2.delta_y5 = -2;
623         vfe_state_ex->scoreboard2.delta_x6 = 1;
624         vfe_state_ex->scoreboard2.delta_y6 = -2;
625         vfe_state_ex->scoreboard2.delta_x7 = -1;
626         vfe_state_ex->scoreboard2.delta_y7 = -2;
627     }
628
629     dri_bo_unmap(media_context->extended_state.bo);
630 }
631
632 static void
633 i965_media_h264_upload_constants(VADriverContextP ctx,
634                                  struct decode_state *decode_state,
635                                  struct i965_media_context *media_context)
636 {
637     struct i965_h264_context *i965_h264_context;
638     unsigned char *constant_buffer;
639     VASliceParameterBufferH264 *slice_param;
640
641     assert(media_context->private_context);
642     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
643
644     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
645     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
646
647     dri_bo_map(media_context->curbe.bo, 1);
648     assert(media_context->curbe.bo->virtual);
649     constant_buffer = media_context->curbe.bo->virtual;
650
651     /* HW solution for W=128 */
652     if (i965_h264_context->use_hw_w128) {
653         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
654     } else {
655         if (slice_param->slice_type == SLICE_TYPE_I ||
656             slice_param->slice_type == SLICE_TYPE_SI) {
657             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
658         } else {
659             /* FIXME: Need to upload CURBE data to inter kernel interface
660              * to support weighted prediction work-around
661              */
662             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
663             constant_buffer += 2;
664             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
665             constant_buffer++;
666             *constant_buffer = 0;
667         }
668     }
669
670     dri_bo_unmap(media_context->curbe.bo);
671 }
672
673 static void
674 i965_media_h264_states_setup(VADriverContextP ctx,
675                              struct decode_state *decode_state,
676                              struct i965_media_context *media_context)
677 {
678     struct i965_h264_context *i965_h264_context;
679
680     assert(media_context->private_context);
681     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
682
683     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
684
685     if (i965_h264_context->use_avc_hw_scoreboard)
686         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
687
688     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
689     i965_media_h264_binding_table(ctx, media_context);
690     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
691     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
692     i965_media_h264_vfe_state(ctx, media_context);
693     i965_media_h264_upload_constants(ctx, decode_state, media_context);
694 }
695
696 static void
697 i965_media_h264_objects(VADriverContextP ctx,
698                         struct decode_state *decode_state,
699                         struct i965_media_context *media_context)
700 {
701     struct intel_batchbuffer *batch = media_context->base.batch;
702     struct i965_h264_context *i965_h264_context;
703     unsigned int *object_command;
704
705     assert(media_context->private_context);
706     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
707
708     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
709     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
710     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
711     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
712     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
713     *object_command++ = 0;
714     *object_command = MI_BATCH_BUFFER_END;
715     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
716
717     BEGIN_BATCH(batch, 2);
718     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
719     OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
720               I915_GEM_DOMAIN_COMMAND, 0,
721               0);
722     ADVANCE_BATCH(batch);
723
724     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
725      * will cause control to pass back to ring buffer
726      */
727     intel_batchbuffer_end_atomic(batch);
728     intel_batchbuffer_flush(batch);
729     intel_batchbuffer_start_atomic(batch, 0x1000);
730     i965_avc_ildb(ctx, decode_state, i965_h264_context);
731 }
732
733 static void
734 i965_media_h264_free_private_context(void **data)
735 {
736     struct i965_h264_context *i965_h264_context = *data;
737     int i;
738
739     if (i965_h264_context == NULL)
740         return;
741
742     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
743     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
744     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
745     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
746     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
747     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
748
749     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
750         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
751
752         dri_bo_unreference(kernel->bo);
753         kernel->bo = NULL;
754     }
755
756     free(i965_h264_context);
757     *data = NULL;
758 }
759
760 void
761 i965_media_h264_decode_init(VADriverContextP ctx,
762                             struct decode_state *decode_state,
763                             struct i965_media_context *media_context)
764 {
765     struct i965_driver_data *i965 = i965_driver_data(ctx);
766     struct i965_h264_context *i965_h264_context = media_context->private_context;
767     dri_bo *bo;
768     VAPictureParameterBufferH264 *pic_param;
769
770     assert(decode_state->pic_param && decode_state->pic_param->buffer);
771     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
772     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
773     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) /
774                                                (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
775     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
776                                                    !pic_param->pic_fields.bits.field_pic_flag);
777     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs *
778                                                      i965_h264_context->picture.height_in_mbs);
779
780     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
781     bo = dri_bo_alloc(i965->intel.bufmgr,
782                       "avc it command mb info",
783                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
784                       0x1000);
785     assert(bo);
786     i965_h264_context->avc_it_command_mb_info.bo = bo;
787
788     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
789     bo = dri_bo_alloc(i965->intel.bufmgr,
790                       "avc it data",
791                       i965_h264_context->avc_it_command_mb_info.mbs *
792                       0x800 *
793                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
794                       0x1000);
795     assert(bo);
796     i965_h264_context->avc_it_data.bo = bo;
797     i965_h264_context->avc_it_data.write_offset = 0;
798     dri_bo_unreference(media_context->indirect_object.bo);
799     media_context->indirect_object.bo = bo;
800     dri_bo_reference(media_context->indirect_object.bo);
801     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
802
803     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
804     bo = dri_bo_alloc(i965->intel.bufmgr,
805                       "AVC-ILDB Data Buffer",
806                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
807                       0x1000);
808     assert(bo);
809     i965_h264_context->avc_ildb_data.bo = bo;
810
811     /* bsd pipeline */
812     i965_avc_bsd_decode_init(ctx, i965_h264_context);
813
814     /* HW scoreboard */
815     if (i965_h264_context->use_avc_hw_scoreboard)
816         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
817
818     /* ILDB */
819     i965_avc_ildb_decode_init(ctx, i965_h264_context);
820
821     /* for Media pipeline */
822     media_context->extended_state.enabled = 1;
823     dri_bo_unreference(media_context->extended_state.bo);
824     bo = dri_bo_alloc(i965->intel.bufmgr,
825                       "extended vfe state",
826                       sizeof(struct i965_vfe_state_ex), 32);
827     assert(bo);
828     media_context->extended_state.bo = bo;
829 }
830
831 void
832 i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
833 {
834     struct i965_driver_data *i965 = i965_driver_data(ctx);
835     struct i965_h264_context *i965_h264_context;
836     int i;
837
838     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
839
840     /* kernel */
841     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) /
842                                     sizeof(h264_avc_kernels_gen5[0])));
843     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
844                                      sizeof(avc_mc_kernel_offset_gen5[0])));
845     assert(i965_h264_context);
846     if (IS_IRONLAKE(i965->intel.device_info)) {
847         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
848         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
849         intra_kernel_header = &intra_kernel_header_gen5;
850         i965_h264_context->use_avc_hw_scoreboard = 1;
851         i965_h264_context->use_hw_w128 = 1;
852     } else {
853         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
854         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
855         intra_kernel_header = &intra_kernel_header_gen4;
856         i965_h264_context->use_avc_hw_scoreboard = 0;
857         i965_h264_context->use_hw_w128 = 0;
858     }
859
860     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
861         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
862         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
863                                   kernel->name,
864                                   kernel->size, 0x1000);
865         assert(kernel->bo);
866         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
867     }
868
869     for (i = 0; i < 16; i++) {
870         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
871         i965_h264_context->fsid_list[i].frame_store_id = -1;
872     }
873
874     i965_h264_context->batch = media_context->base.batch;
875
876     media_context->private_context = i965_h264_context;
877     media_context->free_private_context = i965_media_h264_free_private_context;
878
879     /* URB */
880     if (IS_IRONLAKE(i965->intel.device_info)) {
881         media_context->urb.num_vfe_entries = 63;
882     } else {
883         media_context->urb.num_vfe_entries = 23;
884     }
885
886     media_context->urb.size_vfe_entry = 16;
887
888     media_context->urb.num_cs_entries = 1;
889     media_context->urb.size_cs_entry = 1;
890
891     media_context->urb.vfe_start = 0;
892     media_context->urb.cs_start = media_context->urb.vfe_start +
893                                   media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
894     assert(media_context->urb.cs_start +
895            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
896
897     /* hook functions */
898     media_context->media_states_setup = i965_media_h264_states_setup;
899     media_context->media_objects = i965_media_h264_objects;
900 }