OSDN Git Service

i965_drv_video: add support for H264 on Clarkdale/Arrandale
[android-x86/hardware-intel-common-libva.git] / i965_drv_video / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6 #include "va_backend.h"
7
8 #include "intel_batchbuffer.h"
9 #include "intel_driver.h"
10
11 #include "i965_defines.h"
12 #include "i965_drv_video.h"
13 #include "i965_media.h"
14 #include "i965_media_h264.h"
15
16 enum {
17     INTRA_16X16 = 0,
18     INTRA_8X8,
19     INTRA_4X4,
20     INTRA_PCM,
21     FRAMEMB_MOTION,
22     FIELDMB_MOTION,
23     MBAFF_MOTION,
24 };
25
26 struct intra_kernel_header
27 {
28     /* R1.0 */
29     unsigned char intra_4x4_luma_mode_0_offset;
30     unsigned char intra_4x4_luma_mode_1_offset;
31     unsigned char intra_4x4_luma_mode_2_offset;
32     unsigned char intra_4x4_luma_mode_3_offset;
33     /* R1.1 */
34     unsigned char intra_4x4_luma_mode_4_offset;
35     unsigned char intra_4x4_luma_mode_5_offset;
36     unsigned char intra_4x4_luma_mode_6_offset;
37     unsigned char intra_4x4_luma_mode_7_offset;
38     /* R1.2 */
39     unsigned char intra_4x4_luma_mode_8_offset;
40     unsigned char pad0;
41     unsigned short top_reference_offset;
42     /* R1.3 */
43     unsigned char intra_8x8_luma_mode_0_offset;
44     unsigned char intra_8x8_luma_mode_1_offset;
45     unsigned char intra_8x8_luma_mode_2_offset;
46     unsigned char intra_8x8_luma_mode_3_offset;
47     /* R1.4 */
48     unsigned char intra_8x8_luma_mode_4_offset;
49     unsigned char intra_8x8_luma_mode_5_offset;
50     unsigned char intra_8x8_luma_mode_6_offset;
51     unsigned char intra_8x8_luma_mode_7_offset;
52     /* R1.5 */
53     unsigned char intra_8x8_luma_mode_8_offset;
54     unsigned char pad1;
55     unsigned short const_reverse_data_transfer_intra_8x8;
56     /* R1.6 */
57     unsigned char intra_16x16_luma_mode_0_offset;
58     unsigned char intra_16x16_luma_mode_1_offset;
59     unsigned char intra_16x16_luma_mode_2_offset;
60     unsigned char intra_16x16_luma_mode_3_offset;
61     /* R1.7 */
62     unsigned char intra_chroma_mode_0_offset;
63     unsigned char intra_chroma_mode_1_offset;
64     unsigned char intra_chroma_mode_2_offset;
65     unsigned char intra_chroma_mode_3_offset;
66     /* R2.0 */
67     unsigned int const_intra_16x16_plane_0;
68     /* R2.1 */
69     unsigned int const_intra_16x16_chroma_plane_0;
70     /* R2.2 */
71     unsigned int const_intra_16x16_chroma_plane_1;
72     /* R2.3 */
73     unsigned int const_intra_16x16_plane_1;
74     /* R2.4 */
75     unsigned int left_shift_count_reverse_dw_ordering;
76     /* R2.5 */
77     unsigned int const_reverse_data_transfer_intra_4x4;
78     /* R2.6 */
79     unsigned int intra_4x4_pred_mode_offset;
80 };
81
82 struct inter_kernel_header
83 {
84     unsigned short weight_offset;
85     unsigned char weight_offset_flag;
86     unsigned char pad0;
87 };
88
89 #include "shaders/h264/mc/export.inc"
90 static unsigned long avc_mc_kernel_offset_gen4[] = {
91     INTRA_16x16_IP,
92     INTRA_8x8_IP,
93     INTRA_4x4_IP,
94     INTRA_PCM_IP,
95     FRAME_MB_IP,
96     FIELD_MB_IP,
97     MBAFF_MB_IP
98 };
99
100 #define INST_UNIT_GEN4  16
101 struct intra_kernel_header intra_kernel_header_gen4 = {
102     0 / INST_UNIT_GEN4,
103     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
104     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
105     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
106
107     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
108     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
109     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
110     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
111
112     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
113     0,
114     0xFFFC,
115
116     0 / INST_UNIT_GEN4,
117     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
118     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
119     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
120
121     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
122     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
123     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
124     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
125
126     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
127     0,
128     0x0001,
129
130     0 / INST_UNIT_GEN4,
131     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
132     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
133     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
134
135     0 / INST_UNIT_GEN4,
136     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
137     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
138     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
139
140     0xFCFBFAF9,
141
142     0x00FFFEFD,
143
144     0x04030201,
145
146     0x08070605,
147
148     0x18100800,
149
150     0x00020406,
151
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) / INST_UNIT_GEN4 * 0x1000000 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) / INST_UNIT_GEN4 * 0x10000 + 
154     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) / INST_UNIT_GEN4 * 0x100 + 
155     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP) / INST_UNIT_GEN4
156 };
157
158 static uint32_t h264_avc_combined_gen4[][4] = {
159 #include "shaders/h264/mc/avc_mc.g4b"
160 };
161
162 static uint32_t h264_avc_null_gen4[][4] = {
163 #include "shaders/h264/mc/null.g4b"
164 };
165
166 static struct media_kernel h264_avc_kernels_gen4[] = {
167     {
168         "AVC combined kernel",
169         H264_AVC_COMBINED,
170         h264_avc_combined_gen4,
171         sizeof(h264_avc_combined_gen4),
172         NULL
173     },
174
175     {
176         "NULL kernel",
177         H264_AVC_NULL,
178         h264_avc_null_gen4,
179         sizeof(h264_avc_null_gen4),
180         NULL
181     }
182 };
183
184 /* On Ironlake */
185 #include "shaders/h264/mc/export.inc.gen5"
186 static unsigned long avc_mc_kernel_offset_gen5[] = {
187     INTRA_16x16_IP_GEN5,
188     INTRA_8x8_IP_GEN5,
189     INTRA_4x4_IP_GEN5,
190     INTRA_PCM_IP_GEN5,
191     FRAME_MB_IP_GEN5,
192     FIELD_MB_IP_GEN5,
193     MBAFF_MB_IP_GEN5
194 };
195
196 #define INST_UNIT_GEN5  8
197
198 struct intra_kernel_header intra_kernel_header_gen5 = {
199     0 / INST_UNIT_GEN5,
200     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
201     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
202     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
203
204     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
205     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
206     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
207     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
208
209     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
210     0,
211     0xFFFC,
212
213     0 / INST_UNIT_GEN5,
214     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
215     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
216     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
217
218     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
219     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
220     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
221     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
222
223     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
224     0,
225     0x0001,
226
227     0 / INST_UNIT_GEN5,
228     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
229     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
230     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
231
232     0 / INST_UNIT_GEN5,
233     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
234     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
235     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
236
237     0xFCFBFAF9,
238
239     0x00FFFEFD,
240
241     0x04030201,
242
243     0x08070605,
244
245     0x18100800,
246
247     0x00020406,
248
249     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) / INST_UNIT_GEN5 * 0x1000000 + 
250     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) / INST_UNIT_GEN5 * 0x10000 + 
251     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) / INST_UNIT_GEN5 * 0x100 + 
252     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5) / INST_UNIT_GEN5
253 };
254
255 static uint32_t h264_avc_combined_gen5[][4] = {
256 #include "shaders/h264/mc/avc_mc.g4b.gen5"
257 };
258
259 static uint32_t h264_avc_null_gen5[][4] = {
260 #include "shaders/h264/mc/null.g4b.gen5"
261 };
262
263 static struct media_kernel h264_avc_kernels_gen5[] = {
264     {
265         "AVC combined kernel",
266         H264_AVC_COMBINED,
267         h264_avc_combined_gen5,
268         sizeof(h264_avc_combined_gen5),
269         NULL
270     },
271
272     {
273         "NULL kernel",
274         H264_AVC_NULL,
275         h264_avc_null_gen5,
276         sizeof(h264_avc_null_gen5),
277         NULL
278     }
279 };
280
281 #define NUM_H264_AVC_KERNELS (sizeof(h264_avc_kernels_gen4) / sizeof(h264_avc_kernels_gen4[0]))
282 struct media_kernel *h264_avc_kernels = NULL;
283
284 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
285 static unsigned long *avc_mc_kernel_offset = NULL;
286
287 static struct intra_kernel_header *intra_kernel_header = NULL;
288
289 static void
290 i965_media_h264_surface_state(VADriverContextP ctx, 
291                               int index,
292                               struct object_surface *obj_surface,
293                               unsigned long offset, 
294                               int w, int h, int pitch,
295                               Bool is_dst,
296                               int vert_line_stride,
297                               int vert_line_stride_ofs,
298                               int format)
299 {
300     struct i965_driver_data *i965 = i965_driver_data(ctx);  
301     struct i965_media_state *media_state = &i965->media_state;
302     struct i965_surface_state *ss;
303     dri_bo *bo;
304     uint32_t write_domain, read_domain;
305
306     bo = dri_bo_alloc(i965->intel.bufmgr, 
307                       "surface state", 
308                       sizeof(struct i965_surface_state), 32);
309     assert(bo);
310     dri_bo_map(bo, 1);
311     assert(bo->virtual);
312     ss = bo->virtual;
313     memset(ss, 0, sizeof(*ss));
314     ss->ss0.surface_type = I965_SURFACE_2D;
315     ss->ss0.surface_format = format;
316     ss->ss0.vert_line_stride = vert_line_stride;
317     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
318     ss->ss1.base_addr = obj_surface->bo->offset + offset;
319     ss->ss2.width = w - 1;
320     ss->ss2.height = h - 1;
321     ss->ss3.pitch = pitch - 1;
322
323     if (is_dst) {
324         write_domain = I915_GEM_DOMAIN_RENDER;
325         read_domain = I915_GEM_DOMAIN_RENDER;
326     } else {
327         write_domain = 0;
328         read_domain = I915_GEM_DOMAIN_SAMPLER;
329     }
330
331     dri_bo_emit_reloc(bo,
332                       read_domain, write_domain,
333                       offset,
334                       offsetof(struct i965_surface_state, ss1),
335                       obj_surface->bo);
336     dri_bo_unmap(bo);
337
338     assert(index < MAX_MEDIA_SURFACES);
339     media_state->surface_state[index].bo = bo;
340 }
341
342 static void 
343 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
344                                struct decode_state *decode_state)
345 {
346     struct i965_driver_data *i965 = i965_driver_data(ctx);  
347     struct object_surface *obj_surface;
348     VAPictureParameterBufferH264 *pic_param;
349     VAPictureH264 *va_pic;
350     int i, w, h;
351     int field_picture;
352
353     assert(decode_state->pic_param && decode_state->pic_param->buffer);
354     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
355
356     /* Target Picture */
357     va_pic = &pic_param->CurrPic;
358     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
359     obj_surface = SURFACE(va_pic->picture_id);
360     assert(obj_surface);
361     w = obj_surface->width;
362     h = obj_surface->height;
363     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
364     i965_media_h264_surface_state(ctx, 0, obj_surface,
365                                   0, w / 4, h / (1 + field_picture), w,
366                                   1, 
367                                   field_picture,
368                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
369                                   I965_SURFACEFORMAT_R8_SINT); /* Y */
370     i965_media_h264_surface_state(ctx, 1, obj_surface,
371                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
372                                   1, 
373                                   field_picture,
374                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
375                                   I965_SURFACEFORMAT_R8G8_SINT);  /* INTERLEAVED U/V */
376
377     /* Reference Pictures */
378     for (i = 0; i < 16; i++) {
379         va_pic = &pic_param->ReferenceFrames[i];
380
381         if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
382             obj_surface = SURFACE(va_pic->picture_id);
383             assert(obj_surface);
384             w = obj_surface->width;
385             h = obj_surface->height;
386             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
387             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
388                                           0, w / 4, h / (1 + field_picture), w,
389                                           0, 
390                                           field_picture,
391                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
392                                           I965_SURFACEFORMAT_R8_SINT); /* Y */
393             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
394                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
395                                           0, 
396                                           field_picture,
397                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
398                                           I965_SURFACEFORMAT_R8G8_SINT);  /* INTERLEAVED U/V */
399         }
400     }
401 }
402
403 static void
404 i965_media_h264_binding_table(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_media_state *media_state = &i965->media_state;
408     int i;
409     unsigned int *binding_table;
410     dri_bo *bo = media_state->binding_table.bo;
411
412     dri_bo_map(bo, 1);
413     assert(bo->virtual);
414     binding_table = bo->virtual;
415     memset(binding_table, 0, bo->size);
416
417     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
418         if (media_state->surface_state[i].bo) {
419             binding_table[i] = media_state->surface_state[i].bo->offset;
420             dri_bo_emit_reloc(bo,
421                               I915_GEM_DOMAIN_INSTRUCTION, 0,
422                               0,
423                               i * sizeof(*binding_table),
424                               media_state->surface_state[i].bo);
425         }
426     }
427
428     dri_bo_unmap(media_state->binding_table.bo);
429 }
430
431 static void 
432 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx)
433 {
434     struct i965_driver_data *i965 = i965_driver_data(ctx);
435     struct i965_media_state *media_state = &i965->media_state;
436     struct i965_interface_descriptor *desc;
437     int i;
438     dri_bo *bo;
439
440     bo = media_state->idrt.bo;
441     dri_bo_map(bo, 1);
442     assert(bo->virtual);
443     desc = bo->virtual;
444
445     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
446         int kernel_offset = avc_mc_kernel_offset[i];
447         memset(desc, 0, sizeof(*desc));
448         desc->desc0.grf_reg_blocks = 7; 
449         desc->desc0.kernel_start_pointer = (h264_avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
450         desc->desc1.const_urb_entry_read_offset = 0;
451         desc->desc1.const_urb_entry_read_len = 2;
452         desc->desc3.binding_table_entry_count = 0;
453         desc->desc3.binding_table_pointer = 
454             media_state->binding_table.bo->offset >> 5; /*reloc */
455
456         dri_bo_emit_reloc(bo,
457                           I915_GEM_DOMAIN_INSTRUCTION, 0,
458                           desc->desc0.grf_reg_blocks + kernel_offset,
459                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
460                           h264_avc_kernels[H264_AVC_COMBINED].bo);
461
462         dri_bo_emit_reloc(bo,
463                           I915_GEM_DOMAIN_INSTRUCTION, 0,
464                           desc->desc3.binding_table_entry_count,
465                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
466                           media_state->binding_table.bo);
467         desc++;
468     }
469
470     dri_bo_unmap(bo);
471 }
472
473 static void
474 i965_media_h264_vfe_state(VADriverContextP ctx)
475 {
476     struct i965_driver_data *i965 = i965_driver_data(ctx);
477     struct i965_media_state *media_state = &i965->media_state;
478     struct i965_vfe_state *vfe_state;
479     dri_bo *bo;
480
481     bo = media_state->vfe_state.bo;
482     dri_bo_map(bo, 1);
483     assert(bo->virtual);
484     vfe_state = bo->virtual;
485     memset(vfe_state, 0, sizeof(*vfe_state));
486     vfe_state->vfe0.extend_vfe_state_present = 1;
487     vfe_state->vfe1.max_threads = media_state->urb.num_vfe_entries - 1;
488     vfe_state->vfe1.urb_entry_alloc_size = media_state->urb.size_vfe_entry - 1;
489     vfe_state->vfe1.num_urb_entries = media_state->urb.num_vfe_entries;
490     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
491     vfe_state->vfe1.children_present = 0;
492     vfe_state->vfe2.interface_descriptor_base = 
493         media_state->idrt.bo->offset >> 4; /* reloc */
494     dri_bo_emit_reloc(bo,
495                       I915_GEM_DOMAIN_INSTRUCTION, 0,
496                       0,
497                       offsetof(struct i965_vfe_state, vfe2),
498                       media_state->idrt.bo);
499     dri_bo_unmap(bo);
500 }
501
502 static void 
503 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
504                                     struct decode_state *decode_state)
505 {
506     struct i965_driver_data *i965 = i965_driver_data(ctx);
507     struct i965_media_state *media_state = &i965->media_state;
508     struct i965_h264_context *i965_h264_context;
509     struct i965_vfe_state_ex *vfe_state_ex;
510     VAPictureParameterBufferH264 *pic_param;
511     VASliceParameterBufferH264 *slice_param;
512     int mbaff_frame_flag;
513
514     assert(media_state->private_context);
515     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
516
517     assert(decode_state->pic_param && decode_state->pic_param->buffer);
518     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
519
520     assert(decode_state->slice_param && decode_state->slice_param->buffer);
521     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_param->buffer;
522
523     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
524                         !pic_param->pic_fields.bits.field_pic_flag);
525
526     assert(media_state->extended_state.bo);
527     dri_bo_map(media_state->extended_state.bo, 1);
528     assert(media_state->extended_state.bo->virtual);
529     vfe_state_ex = media_state->extended_state.bo->virtual;
530     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
531
532     /*
533      * Indirect data buffer:
534      * --------------------------------------------------------
535      * | Motion Vectors | Weight/Offset data | Residual data |
536      * --------------------------------------------------------
537      * R4-R7: Motion Vectors
538      * R8-R9: Weight/Offset
539      * R10-R33: Residual data
540      */
541     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
542     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
543
544     if (slice_param->slice_type == SLICE_TYPE_I) 
545         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
546     else 
547         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
548
549     if (vfe_state_ex->vfex1.avc.sub_field_present_flag == 0) {
550         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
551         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
552     } else {
553         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
554         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
555     }
556
557     if (!pic_param->pic_fields.bits.field_pic_flag) {
558         if (mbaff_frame_flag) {
559             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
560             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
561             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
562             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
563             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
564             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
565             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
566             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
567
568             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
569             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
570             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
571             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
572             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
573             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
574             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
575             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
576         } else {
577             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
578             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
579             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
580             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
581             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
582             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
583             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
584             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
585
586             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
587             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
588             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
589             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
590             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
591             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
592             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
593             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
594         }
595     } else {
596         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
597         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
598         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
599         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
600         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
601         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
602         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
603         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
604
605         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
606         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
607         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
608         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
609         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
610         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
611         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
612         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
613     }
614
615     if (i965_h264_context->use_avc_hw_scoreboard) {
616         vfe_state_ex->scoreboard0.enable = 1;
617         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
618         vfe_state_ex->scoreboard0.mask = 0xff;
619
620         vfe_state_ex->scoreboard1.delta_x0 = -1;
621         vfe_state_ex->scoreboard1.delta_y0 = 0;
622         vfe_state_ex->scoreboard1.delta_x1 = 0;
623         vfe_state_ex->scoreboard1.delta_y1 = -1;
624         vfe_state_ex->scoreboard1.delta_x2 = 1;
625         vfe_state_ex->scoreboard1.delta_y2 = -1;
626         vfe_state_ex->scoreboard1.delta_x3 = -1;
627         vfe_state_ex->scoreboard1.delta_y3 = -1;
628
629         vfe_state_ex->scoreboard2.delta_x4 = -1;
630         vfe_state_ex->scoreboard2.delta_y4 = 1;
631         vfe_state_ex->scoreboard2.delta_x5 = 0;
632         vfe_state_ex->scoreboard2.delta_y5 = -2;
633         vfe_state_ex->scoreboard2.delta_x6 = 1;
634         vfe_state_ex->scoreboard2.delta_y6 = -2;
635         vfe_state_ex->scoreboard2.delta_x7 = -1;
636         vfe_state_ex->scoreboard2.delta_y7 = -2;
637     }
638
639     dri_bo_unmap(media_state->extended_state.bo);
640 }
641
642 static void
643 i965_media_h264_upload_constants(VADriverContextP ctx, struct decode_state *decode_state)
644 {
645     struct i965_driver_data *i965 = i965_driver_data(ctx);
646     struct i965_media_state *media_state = &i965->media_state;
647     struct i965_h264_context *i965_h264_context;
648     unsigned char *constant_buffer;
649     VASliceParameterBufferH264 *slice_param;
650
651     assert(media_state->private_context);
652     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
653
654     assert(decode_state->slice_param && decode_state->slice_param->buffer);
655     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_param->buffer;
656
657     dri_bo_map(media_state->curbe.bo, 1);
658     assert(media_state->curbe.bo->virtual);
659     constant_buffer = media_state->curbe.bo->virtual;
660
661     /* HW solution for W=128 */
662     if (i965_h264_context->use_hw_w128) {
663         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
664     } else {
665         if (slice_param->slice_type == SLICE_TYPE_I) {
666             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
667         } else {
668             /* FIXME: Need to upload CURBE data to inter kernel interface 
669              * to support weighted prediction work-around 
670              */
671             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
672             constant_buffer += 2;
673             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
674             constant_buffer++;
675             *constant_buffer = 0;
676         }
677     }
678
679     dri_bo_unmap(media_state->curbe.bo);
680 }
681
682 static void
683 i965_media_h264_states_setup(VADriverContextP ctx, struct decode_state *decode_state)
684 {
685     struct i965_driver_data *i965 = i965_driver_data(ctx);
686     struct i965_media_state *media_state = &i965->media_state;
687     struct i965_h264_context *i965_h264_context;
688     VAPictureParameterBufferH264 *pic_param;
689     unsigned int *object_command;
690
691     assert(media_state->private_context);
692     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
693     assert(decode_state->pic_param && decode_state->pic_param->buffer);
694     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
695
696     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
697     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
698         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
699     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
700                                                    !pic_param->pic_fields.bits.field_pic_flag);
701
702     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
703                                                      i965_h264_context->picture.height_in_mbs);
704
705     i965_avc_bsd_pipeline(ctx, decode_state);
706
707     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
708     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
709     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
710     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
711     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
712     *object_command = MI_BATCH_BUFFER_END;
713     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
714
715     i965_avc_hw_scoreboard(ctx, decode_state);
716
717     i965_media_h264_surfaces_setup(ctx, decode_state);
718     i965_media_h264_binding_table(ctx);
719     i965_media_h264_interface_descriptor_remap_table(ctx);
720     i965_media_h264_vfe_state_extension(ctx, decode_state);
721     i965_media_h264_vfe_state(ctx);
722     i965_media_h264_upload_constants(ctx, decode_state);
723 }
724
725 static void
726 i965_media_h264_objects(VADriverContextP ctx, struct decode_state *decode_state)
727 {
728     struct i965_driver_data *i965 = i965_driver_data(ctx);
729     struct i965_media_state *media_state = &i965->media_state;
730     struct i965_h264_context *i965_h264_context;
731
732     assert(media_state->private_context);
733     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
734     
735     BEGIN_BATCH(ctx, 2);
736     OUT_BATCH(ctx, MI_BATCH_BUFFER_START | (2 << 6));
737     OUT_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, 
738               I915_GEM_DOMAIN_COMMAND, 0, 
739               0);
740     ADVANCE_BATCH(ctx);
741 }
742
743 void
744 i965_media_h264_decode_init(VADriverContextP ctx)
745 {
746     struct i965_driver_data *i965 = i965_driver_data(ctx);
747     struct i965_media_state *media_state = &i965->media_state;
748     struct i965_h264_context *i965_h264_context;
749     dri_bo *bo;
750
751     assert(media_state->private_context);
752     i965_h264_context = media_state->private_context;
753
754     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
755     bo = dri_bo_alloc(i965->intel.bufmgr,
756                       "avc it command mb info",
757                       0x80000 * (1 + i965_h264_context->use_avc_hw_scoreboard),  /* at least 522240 bytes */
758                       0x1000);
759     assert(bo);
760     i965_h264_context->avc_it_command_mb_info.bo = bo;
761
762     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
763     bo = dri_bo_alloc(i965->intel.bufmgr,
764                       "avc it data",
765                       0x1000000, /* at least 16711680 bytes */
766                       4096);
767     assert(bo);
768     i965_h264_context->avc_it_data.bo = bo;
769     i965_h264_context->avc_it_data.write_offset = 0;
770     dri_bo_unreference(media_state->indirect_object.bo);
771     media_state->indirect_object.bo = bo;
772     dri_bo_reference(media_state->indirect_object.bo);
773     media_state->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
774
775     /* bsd pipeline */
776     i965_avc_bsd_decode_init(ctx);
777
778     /* HW scoreboard */
779     i965_avc_hw_scoreboard_decode_init(ctx);
780
781     /* for Media pipeline */
782     media_state->extended_state.enabled = 1;
783     dri_bo_unreference(media_state->extended_state.bo);
784     bo = dri_bo_alloc(i965->intel.bufmgr, 
785                       "extened vfe state", 
786                       sizeof(struct i965_vfe_state_ex), 32);
787     assert(bo);
788     media_state->extended_state.bo = bo;
789
790     /* URB */
791     if (IS_IRONLAKE(i965->intel.device_id)) {
792         media_state->urb.num_vfe_entries = 63;
793     } else {
794         media_state->urb.num_vfe_entries = 23;
795     }
796
797     media_state->urb.size_vfe_entry = 16;
798
799     media_state->urb.num_cs_entries = 1;
800     media_state->urb.size_cs_entry = 1;
801
802     media_state->urb.vfe_start = 0;
803     media_state->urb.cs_start = media_state->urb.vfe_start + 
804         media_state->urb.num_vfe_entries * media_state->urb.size_vfe_entry;
805     assert(media_state->urb.cs_start + 
806            media_state->urb.num_cs_entries * media_state->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
807
808     /* hook functions */
809     media_state->media_states_setup = i965_media_h264_states_setup;
810     media_state->media_objects = i965_media_h264_objects;
811 }
812
813 Bool 
814 i965_media_h264_init(VADriverContextP ctx)
815 {
816     struct i965_driver_data *i965 = i965_driver_data(ctx);
817     struct i965_media_state *media_state = &i965->media_state;
818     struct i965_h264_context *i965_h264_context;
819     int i;
820
821     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
822
823     /* kernel */
824     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
825                                     sizeof(h264_avc_kernels_gen5[0])));
826     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
827                                      sizeof(avc_mc_kernel_offset_gen5[0])));
828
829     if (IS_IRONLAKE(i965->intel.device_id)) {
830         h264_avc_kernels = h264_avc_kernels_gen5;
831         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
832         intra_kernel_header = &intra_kernel_header_gen5;
833         i965_h264_context->use_avc_hw_scoreboard = 1;
834         i965_h264_context->use_hw_w128 = 1;
835     } else {
836         h264_avc_kernels = h264_avc_kernels_gen4;
837         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
838         intra_kernel_header = &intra_kernel_header_gen4;
839         i965_h264_context->use_avc_hw_scoreboard = 0;
840         i965_h264_context->use_hw_w128 = 0;
841     }
842
843     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
844         struct media_kernel *kernel = &h264_avc_kernels[i];
845         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
846                                   kernel->name, 
847                                   kernel->size, 64);
848         assert(kernel->bo);
849         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
850     }
851
852     media_state->private_context = i965_h264_context;
853     return True;
854 }
855
856 Bool 
857 i965_media_h264_ternimate(VADriverContextP ctx)
858 {
859     struct i965_driver_data *i965 = i965_driver_data(ctx);
860     struct i965_media_state *media_state = &i965->media_state;
861     struct i965_h264_context *i965_h264_context;
862     int i;
863
864     if (media_state->private_context) {
865         i965_h264_context = (struct i965_h264_context *)media_state->private_context;
866         i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
867         i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
868         dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
869         dri_bo_unreference(i965_h264_context->avc_it_data.bo);
870         free(i965_h264_context);
871         media_state->private_context = NULL;
872     }
873
874     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
875         struct media_kernel *kernel = &h264_avc_kernels[i];
876
877         dri_bo_unreference(kernel->bo);
878         kernel->bo = NULL;
879     }
880
881     return True;
882 }