OSDN Git Service

i965_drv_video: move pp_context to i965_driver_data and make it a pointer.
[android-x86/hardware-intel-common-libva.git] / i965_drv_video / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_post_processing.h"
41 #include "i965_render.h"
42 #include "i965_drv_video.h"
43
44 struct pp_module
45 {
46     /* kernel */
47     char *name;
48     int interface;
49     unsigned int (*bin)[4];
50     int size;
51     dri_bo *bo;
52
53     /* others */
54     void (*initialize)(VADriverContextP ctx, VASurfaceID surface, int input,
55                        unsigned short srcw, unsigned short srch,
56                        unsigned short destw, unsigned short desth);
57 };
58
59 static uint32_t pp_null_gen5[][4] = {
60 #include "shaders/post_processing/null.g4b.gen5"
61 };
62
63 static uint32_t pp_nv12_load_save_gen5[][4] = {
64 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
65 };
66
67 static uint32_t pp_nv12_scaling_gen5[][4] = {
68 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
69 };
70
71 static uint32_t pp_nv12_avs_gen5[][4] = {
72 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
73 };
74
75 static uint32_t pp_nv12_dndi_gen5[][4] = {
76 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
77 };
78
79 static void ironlake_pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
80                                         unsigned short srcw, unsigned short srch,
81                                         unsigned short destw, unsigned short desth);
82 static void ironlake_pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
83                                             unsigned short srcw, unsigned short srch,
84                                             unsigned short destw, unsigned short desth);
85 static void ironlake_pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
86                                                 unsigned short srcw, unsigned short srch,
87                                                 unsigned short destw, unsigned short desth);
88 static void ironlake_pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
89                                                   unsigned short srcw, unsigned short srch,
90                                                   unsigned short destw, unsigned short desth);
91 static void ironlake_pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
92                                              unsigned short srcw, unsigned short srch,
93                                              unsigned short destw, unsigned short desth);
94
95 static struct pp_module pp_modules_gen5[] = {
96     {
97         "NULL module (for testing)",
98         PP_NULL,
99         pp_null_gen5,
100         sizeof(pp_null_gen5),
101         NULL,
102         ironlake_pp_null_initialize,
103     },
104
105     {
106         "NV12 Load & Save module",
107         PP_NV12_LOAD_SAVE,
108         pp_nv12_load_save_gen5,
109         sizeof(pp_nv12_load_save_gen5),
110         NULL,
111         ironlake_pp_nv12_load_save_initialize,
112     },
113
114     {
115         "NV12 Scaling module",
116         PP_NV12_SCALING,
117         pp_nv12_scaling_gen5,
118         sizeof(pp_nv12_scaling_gen5),
119         NULL,
120         ironlake_pp_nv12_scaling_initialize,
121     },
122
123     {
124         "NV12 AVS module",
125         PP_NV12_AVS,
126         pp_nv12_avs_gen5,
127         sizeof(pp_nv12_avs_gen5),
128         NULL,
129         ironlake_pp_nv12_avs_initialize,
130     },
131
132     {
133         "NV12 DNDI module",
134         PP_NV12_DNDI,
135         pp_nv12_dndi_gen5,
136         sizeof(pp_nv12_dndi_gen5),
137         NULL,
138         ironlake_pp_nv12_dndi_initialize,
139     },
140 };
141
142 #define NUM_PP_MODULES ARRAY_ELEMS(pp_modules_gen5)
143
144 static struct pp_module *pp_modules = NULL;
145
146 struct ironlake_pp_static_parameter
147 {
148     struct {
149         /* Procamp r1.0 */
150         float procamp_constant_c0;
151         
152         /* Load and Same r1.1 */
153         unsigned int source_packed_y_offset:8;
154         unsigned int source_packed_u_offset:8;
155         unsigned int source_packed_v_offset:8;
156         unsigned int pad0:8;
157
158         union {
159             /* Load and Save r1.2 */
160             struct {
161                 unsigned int destination_packed_y_offset:8;
162                 unsigned int destination_packed_u_offset:8;
163                 unsigned int destination_packed_v_offset:8;
164                 unsigned int pad0:8;
165             } load_and_save;
166
167             /* CSC r1.2 */
168             struct {
169                 unsigned int destination_rgb_format:8;
170                 unsigned int pad0:24;
171             } csc;
172         } r1_2;
173         
174         /* Procamp r1.3 */
175         float procamp_constant_c1;
176
177         /* Procamp r1.4 */
178         float procamp_constant_c2;
179
180         /* DI r1.5 */
181         unsigned int statistics_surface_picth:16;  /* Devided by 2 */
182         unsigned int pad1:16;
183
184         union {
185             /* DI r1.6 */
186             struct {
187                 unsigned int pad0:24;
188                 unsigned int top_field_first:8;
189             } di;
190
191             /* AVS/Scaling r1.6 */
192             float normalized_video_y_scaling_step;
193         } r1_6;
194
195         /* Procamp r1.7 */
196         float procamp_constant_c5;
197     } grf1;
198     
199     struct {
200         /* Procamp r2.0 */
201         float procamp_constant_c3;
202
203         /* MBZ r2.1*/
204         unsigned int pad0;
205
206         /* WG+CSC r2.2 */
207         float wg_csc_constant_c4;
208
209         /* WG+CSC r2.3 */
210         float wg_csc_constant_c8;
211
212         /* Procamp r2.4 */
213         float procamp_constant_c4;
214
215         /* MBZ r2.5 */
216         unsigned int pad1;
217
218         /* MBZ r2.6 */
219         unsigned int pad2;
220
221         /* WG+CSC r2.7 */
222         float wg_csc_constant_c9;
223     } grf2;
224
225     struct {
226         /* WG+CSC r3.0 */
227         float wg_csc_constant_c0;
228
229         /* Blending r3.1 */
230         float scaling_step_ratio;
231
232         /* Blending r3.2 */
233         float normalized_alpha_y_scaling;
234         
235         /* WG+CSC r3.3 */
236         float wg_csc_constant_c4;
237
238         /* WG+CSC r3.4 */
239         float wg_csc_constant_c1;
240
241         /* ALL r3.5 */
242         int horizontal_origin_offset:16;
243         int vertical_origin_offset:16;
244
245         /* Shared r3.6*/
246         union {
247             /* Color filll */
248             unsigned int color_pixel;
249
250             /* WG+CSC */
251             float wg_csc_constant_c2;
252         } r3_6;
253
254         /* WG+CSC r3.7 */
255         float wg_csc_constant_c3;
256     } grf3;
257
258     struct {
259         /* WG+CSC r4.0 */
260         float wg_csc_constant_c6;
261
262         /* ALL r4.1 MBZ ???*/
263         unsigned int pad0;
264
265         /* Shared r4.2 */
266         union {
267             /* AVS */
268             struct {
269                 unsigned int pad1:15;
270                 unsigned int nlas:1;
271                 unsigned int pad2:16;
272             } avs;
273
274             /* DI */
275             struct {
276                 unsigned int motion_history_coefficient_m2:8;
277                 unsigned int motion_history_coefficient_m1:8;
278                 unsigned int pad0:16;
279             } di;
280         } r4_2;
281
282         /* WG+CSC r4.3 */
283         float wg_csc_constant_c7;
284
285         /* WG+CSC r4.4 */
286         float wg_csc_constant_c10;
287
288         /* AVS r4.5 */
289         float source_video_frame_normalized_horizontal_origin;
290
291         /* MBZ r4.6 */
292         unsigned int pad1;
293
294         /* WG+CSC r4.7 */
295         float wg_csc_constant_c11;
296     } grf4;
297 };
298
299 struct ironlake_pp_inline_parameter
300 {
301     struct {
302         /* ALL r5.0 */
303         int destination_block_horizontal_origin:16;
304         int destination_block_vertical_origin:16;
305
306         /* Shared r5.1 */
307         union {
308             /* AVS/Scaling */
309             float source_surface_block_normalized_horizontal_origin;
310
311             /* FMD */
312             struct {
313                 unsigned int variance_surface_vertical_origin:16;
314                 unsigned int pad0:16;
315             } fmd;
316         } r5_1; 
317
318         /* AVS/Scaling r5.2 */
319         float source_surface_block_normalized_vertical_origin;
320
321         /* Alpha r5.3 */
322         float alpha_surface_block_normalized_horizontal_origin;
323
324         /* Alpha r5.4 */
325         float alpha_surface_block_normalized_vertical_origin;
326
327         /* Alpha r5.5 */
328         unsigned int alpha_mask_x:16;
329         unsigned int alpha_mask_y:8;
330         unsigned int block_count_x:8;
331
332         /* r5.6 */
333         unsigned int block_horizontal_mask:16;
334         unsigned int block_vertical_mask:8;
335         unsigned int number_blocks:8;
336
337         /* AVS/Scaling r5.7 */
338         float normalized_video_x_scaling_step;
339     } grf5;
340
341     struct {
342         /* AVS r6.0 */
343         float video_step_delta;
344
345         /* r6.1-r6.7 */
346         unsigned int padx[7];
347     } grf6;
348 };
349
350 static struct ironlake_pp_static_parameter ironlake_pp_static_parameter;
351 static struct ironlake_pp_inline_parameter ironlake_pp_inline_parameter;
352
353 static void
354 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
355 {
356
357 }
358
359 static void
360 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
361 {
362     struct i965_interface_descriptor *desc;
363     dri_bo *bo;
364     int pp_index = pp_context->current_pp;
365
366     bo = pp_context->idrt.bo;
367     dri_bo_map(bo, 1);
368     assert(bo->virtual);
369     desc = bo->virtual;
370     memset(desc, 0, sizeof(*desc));
371     desc->desc0.grf_reg_blocks = 10;
372     desc->desc0.kernel_start_pointer = pp_modules[pp_index].bo->offset >> 6; /* reloc */
373     desc->desc1.const_urb_entry_read_offset = 0;
374     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
375     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
376     desc->desc2.sampler_count = 0;
377     desc->desc3.binding_table_entry_count = 0;
378     desc->desc3.binding_table_pointer = 
379         pp_context->binding_table.bo->offset >> 5; /*reloc */
380
381     dri_bo_emit_reloc(bo,
382                       I915_GEM_DOMAIN_INSTRUCTION, 0,
383                       desc->desc0.grf_reg_blocks,
384                       offsetof(struct i965_interface_descriptor, desc0),
385                       pp_modules[pp_index].bo);
386
387     dri_bo_emit_reloc(bo,
388                       I915_GEM_DOMAIN_INSTRUCTION, 0,
389                       desc->desc2.sampler_count << 2,
390                       offsetof(struct i965_interface_descriptor, desc2),
391                       pp_context->sampler_state_table.bo);
392
393     dri_bo_emit_reloc(bo,
394                       I915_GEM_DOMAIN_INSTRUCTION, 0,
395                       desc->desc3.binding_table_entry_count,
396                       offsetof(struct i965_interface_descriptor, desc3),
397                       pp_context->binding_table.bo);
398
399     dri_bo_unmap(bo);
400 }
401
402 static void
403 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
404 {
405     unsigned int *binding_table;
406     dri_bo *bo = pp_context->binding_table.bo;
407     int i;
408
409     dri_bo_map(bo, 1);
410     assert(bo->virtual);
411     binding_table = bo->virtual;
412     memset(binding_table, 0, bo->size);
413
414     for (i = 0; i < MAX_PP_SURFACES; i++) {
415         if (pp_context->surfaces[i].ss_bo) {
416             assert(pp_context->surfaces[i].s_bo);
417
418             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
419             dri_bo_emit_reloc(bo,
420                               I915_GEM_DOMAIN_INSTRUCTION, 0,
421                               0,
422                               i * sizeof(*binding_table),
423                               pp_context->surfaces[i].ss_bo);
424         }
425     
426     }
427
428     dri_bo_unmap(bo);
429 }
430
431 static void
432 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
433 {
434     struct i965_vfe_state *vfe_state;
435     dri_bo *bo;
436
437     bo = pp_context->vfe_state.bo;
438     dri_bo_map(bo, 1);
439     assert(bo->virtual);
440     vfe_state = bo->virtual;
441     memset(vfe_state, 0, sizeof(*vfe_state));
442     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
443     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
444     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
445     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
446     vfe_state->vfe1.children_present = 0;
447     vfe_state->vfe2.interface_descriptor_base = 
448         pp_context->idrt.bo->offset >> 4; /* reloc */
449     dri_bo_emit_reloc(bo,
450                       I915_GEM_DOMAIN_INSTRUCTION, 0,
451                       0,
452                       offsetof(struct i965_vfe_state, vfe2),
453                       pp_context->idrt.bo);
454     dri_bo_unmap(bo);
455 }
456
457 static void
458 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
459 {
460     unsigned char *constant_buffer;
461
462     assert(sizeof(ironlake_pp_static_parameter) == 128);
463     dri_bo_map(pp_context->curbe.bo, 1);
464     assert(pp_context->curbe.bo->virtual);
465     constant_buffer = pp_context->curbe.bo->virtual;
466     memcpy(constant_buffer, &ironlake_pp_static_parameter, sizeof(ironlake_pp_static_parameter));
467     dri_bo_unmap(pp_context->curbe.bo);
468 }
469
470 static void
471 ironlake_pp_states_setup(VADriverContextP ctx)
472 {
473     struct i965_driver_data *i965 = i965_driver_data(ctx);
474     struct i965_post_processing_context *pp_context = i965->pp_context;
475
476     ironlake_pp_surface_state(pp_context);
477     ironlake_pp_binding_table(pp_context);
478     ironlake_pp_interface_descriptor_table(pp_context);
479     ironlake_pp_vfe_state(pp_context);
480     ironlake_pp_upload_constants(pp_context);
481 }
482
483 static void
484 ironlake_pp_pipeline_select(VADriverContextP ctx)
485 {
486     BEGIN_BATCH(ctx, 1);
487     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
488     ADVANCE_BATCH(ctx);
489 }
490
491 static void
492 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
493 {
494     unsigned int vfe_fence, cs_fence;
495
496     vfe_fence = pp_context->urb.cs_start;
497     cs_fence = pp_context->urb.size;
498
499     BEGIN_BATCH(ctx, 3);
500     OUT_BATCH(ctx, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
501     OUT_BATCH(ctx, 0);
502     OUT_BATCH(ctx, 
503               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
504               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
505     ADVANCE_BATCH(ctx);
506 }
507
508 static void
509 ironlake_pp_state_base_address(VADriverContextP ctx)
510 {
511     BEGIN_BATCH(ctx, 8);
512     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
513     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
514     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
515     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
516     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
517     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
518     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
519     OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
520     ADVANCE_BATCH(ctx);
521 }
522
523 static void
524 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
525 {
526     BEGIN_BATCH(ctx, 3);
527     OUT_BATCH(ctx, CMD_MEDIA_STATE_POINTERS | 1);
528     OUT_BATCH(ctx, 0);
529     OUT_RELOC(ctx, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
530     ADVANCE_BATCH(ctx);
531 }
532
533 static void 
534 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
535 {
536     BEGIN_BATCH(ctx, 2);
537     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
538     OUT_BATCH(ctx,
539               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
540               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
541     ADVANCE_BATCH(ctx);
542 }
543
544 static void
545 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
546 {
547     BEGIN_BATCH(ctx, 2);
548     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
549     OUT_RELOC(ctx, pp_context->curbe.bo,
550               I915_GEM_DOMAIN_INSTRUCTION, 0,
551               pp_context->urb.size_cs_entry - 1);
552     ADVANCE_BATCH(ctx);    
553 }
554
555 static void
556 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
557 {
558     int x, x_steps, y, y_steps;
559
560     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
561     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
562
563     for (y = 0; y < y_steps; y++) {
564         for (x = 0; x < x_steps; x++) {
565             if (!pp_context->pp_set_block_parameter(&pp_context->private_context, x, y)) {
566                 BEGIN_BATCH(ctx, 20);
567                 OUT_BATCH(ctx, CMD_MEDIA_OBJECT | 18);
568                 OUT_BATCH(ctx, 0);
569                 OUT_BATCH(ctx, 0); /* no indirect data */
570                 OUT_BATCH(ctx, 0);
571
572                 /* inline data grf 5-6 */
573                 assert(sizeof(ironlake_pp_inline_parameter) == 64);
574                 intel_batchbuffer_data(ctx, &ironlake_pp_inline_parameter, sizeof(ironlake_pp_inline_parameter));
575
576                 ADVANCE_BATCH(ctx);
577             }
578         }
579     }
580 }
581
582 static void
583 ironlake_pp_pipeline_setup(VADriverContextP ctx)
584 {
585     struct i965_driver_data *i965 = i965_driver_data(ctx);
586     struct i965_post_processing_context *pp_context = i965->pp_context;
587
588     intel_batchbuffer_start_atomic(ctx, 0x1000);
589     intel_batchbuffer_emit_mi_flush(ctx);
590     ironlake_pp_pipeline_select(ctx);
591     ironlake_pp_state_base_address(ctx);
592     ironlake_pp_state_pointers(ctx, pp_context);
593     ironlake_pp_urb_layout(ctx, pp_context);
594     ironlake_pp_cs_urb_layout(ctx, pp_context);
595     ironlake_pp_constant_buffer(ctx, pp_context);
596     ironlake_pp_object_walker(ctx, pp_context);
597     intel_batchbuffer_end_atomic(ctx);
598 }
599
600 static int
601 pp_null_x_steps(void *private_context)
602 {
603     return 1;
604 }
605
606 static int
607 pp_null_y_steps(void *private_context)
608 {
609     return 1;
610 }
611
612 static int
613 pp_null_set_block_parameter(void *private_context, int x, int y)
614 {
615     return 0;
616 }
617
618 static void
619 ironlake_pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
620                             unsigned short srcw, unsigned short srch,
621                             unsigned short destw, unsigned short desth)
622 {
623     struct i965_driver_data *i965 = i965_driver_data(ctx);
624     struct i965_post_processing_context *pp_context = i965->pp_context;
625     struct object_surface *obj_surface;
626
627     /* surface */
628     obj_surface = SURFACE(surface);
629     dri_bo_unreference(obj_surface->pp_out_bo);
630     obj_surface->pp_out_bo = obj_surface->bo;
631     dri_bo_reference(obj_surface->pp_out_bo);
632     assert(obj_surface->pp_out_bo);
633     obj_surface->pp_out_width = obj_surface->width;
634     obj_surface->pp_out_height = obj_surface->height;
635     obj_surface->orig_pp_out_width = obj_surface->orig_width;
636     obj_surface->orig_pp_out_height = obj_surface->orig_height;
637
638     /* private function & data */
639     pp_context->pp_x_steps = pp_null_x_steps;
640     pp_context->pp_y_steps = pp_null_y_steps;
641     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
642 }
643
644 static int
645 pp_load_save_x_steps(void *private_context)
646 {
647     return 1;
648 }
649
650 static int
651 pp_load_save_y_steps(void *private_context)
652 {
653     struct pp_load_save_context *pp_load_save_context = private_context;
654
655     return pp_load_save_context->dest_h / 8;
656 }
657
658 static int
659 pp_load_save_set_block_parameter(void *private_context, int x, int y)
660 {
661     ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff;
662     ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
663     ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
664     ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
665
666     return 0;
667 }
668
669 static void
670 ironlake_pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
671                                       unsigned short srcw, unsigned short srch,
672                                       unsigned short destw, unsigned short desth)
673 {
674     struct i965_driver_data *i965 = i965_driver_data(ctx);
675     struct i965_post_processing_context *pp_context = i965->pp_context;
676     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
677     struct object_surface *obj_surface;
678     struct i965_surface_state *ss;
679     dri_bo *bo;
680     int index, w, h;
681     int orig_w, orig_h;
682
683     /* surface */
684     obj_surface = SURFACE(surface);
685     orig_w = obj_surface->orig_width;
686     orig_h = obj_surface->orig_height;
687     w = obj_surface->width;
688     h = obj_surface->height;
689
690     dri_bo_unreference(obj_surface->pp_out_bo);
691     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
692                                           "intermediate surface",
693                                           SIZE_YUV420(w, h),
694                                           4096);
695     assert(obj_surface->pp_out_bo);
696     obj_surface->pp_out_width = obj_surface->width;
697     obj_surface->pp_out_height = obj_surface->height;
698     obj_surface->orig_pp_out_width = obj_surface->orig_width;
699     obj_surface->orig_pp_out_height = obj_surface->orig_height;
700
701     /* source Y surface index 1 */
702     index = 1;
703     pp_context->surfaces[index].s_bo = obj_surface->bo;
704     dri_bo_reference(pp_context->surfaces[index].s_bo);
705     bo = dri_bo_alloc(i965->intel.bufmgr, 
706                       "surface state", 
707                       sizeof(struct i965_surface_state), 
708                       4096);
709     assert(bo);
710     pp_context->surfaces[index].ss_bo = bo;
711     dri_bo_map(bo, True);
712     assert(bo->virtual);
713     ss = bo->virtual;
714     memset(ss, 0, sizeof(*ss));
715     ss->ss0.surface_type = I965_SURFACE_2D;
716     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
717     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
718     ss->ss2.width = orig_w / 4 - 1;
719     ss->ss2.height = orig_h - 1;
720     ss->ss3.pitch = w - 1;
721     dri_bo_emit_reloc(bo,
722                       I915_GEM_DOMAIN_RENDER, 
723                       0,
724                       0,
725                       offsetof(struct i965_surface_state, ss1),
726                       pp_context->surfaces[index].s_bo);
727     dri_bo_unmap(bo);
728
729     /* source UV surface index 2 */
730     index = 2;
731     pp_context->surfaces[index].s_bo = obj_surface->bo;
732     dri_bo_reference(pp_context->surfaces[index].s_bo);
733     bo = dri_bo_alloc(i965->intel.bufmgr, 
734                       "surface state", 
735                       sizeof(struct i965_surface_state), 
736                       4096);
737     assert(bo);
738     pp_context->surfaces[index].ss_bo = bo;
739     dri_bo_map(bo, True);
740     assert(bo->virtual);
741     ss = bo->virtual;
742     memset(ss, 0, sizeof(*ss));
743     ss->ss0.surface_type = I965_SURFACE_2D;
744     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
745     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
746     ss->ss2.width = orig_w / 4 - 1;
747     ss->ss2.height = orig_h / 2 - 1;
748     ss->ss3.pitch = w - 1;
749     dri_bo_emit_reloc(bo,
750                       I915_GEM_DOMAIN_RENDER, 
751                       0,
752                       w * h,
753                       offsetof(struct i965_surface_state, ss1),
754                       pp_context->surfaces[index].s_bo);
755     dri_bo_unmap(bo);
756
757     /* destination Y surface index 7 */
758     index = 7;
759     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
760     dri_bo_reference(pp_context->surfaces[index].s_bo);
761     bo = dri_bo_alloc(i965->intel.bufmgr, 
762                       "surface state", 
763                       sizeof(struct i965_surface_state), 
764                       4096);
765     assert(bo);
766     pp_context->surfaces[index].ss_bo = bo;
767     dri_bo_map(bo, True);
768     assert(bo->virtual);
769     ss = bo->virtual;
770     memset(ss, 0, sizeof(*ss));
771     ss->ss0.surface_type = I965_SURFACE_2D;
772     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
773     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
774     ss->ss2.width = orig_w / 4 - 1;
775     ss->ss2.height = orig_h - 1;
776     ss->ss3.pitch = w - 1;
777     dri_bo_emit_reloc(bo,
778                       I915_GEM_DOMAIN_RENDER, 
779                       I915_GEM_DOMAIN_RENDER,
780                       0,
781                       offsetof(struct i965_surface_state, ss1),
782                       pp_context->surfaces[index].s_bo);
783     dri_bo_unmap(bo);
784
785     /* destination UV surface index 8 */
786     index = 8;
787     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
788     dri_bo_reference(pp_context->surfaces[index].s_bo);
789     bo = dri_bo_alloc(i965->intel.bufmgr, 
790                       "surface state", 
791                       sizeof(struct i965_surface_state), 
792                       4096);
793     assert(bo);
794     pp_context->surfaces[index].ss_bo = bo;
795     dri_bo_map(bo, True);
796     assert(bo->virtual);
797     ss = bo->virtual;
798     memset(ss, 0, sizeof(*ss));
799     ss->ss0.surface_type = I965_SURFACE_2D;
800     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
801     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
802     ss->ss2.width = orig_w / 4 - 1;
803     ss->ss2.height = orig_h / 2 - 1;
804     ss->ss3.pitch = w - 1;
805     dri_bo_emit_reloc(bo,
806                       I915_GEM_DOMAIN_RENDER, 
807                       I915_GEM_DOMAIN_RENDER,
808                       w * h,
809                       offsetof(struct i965_surface_state, ss1),
810                       pp_context->surfaces[index].s_bo);
811     dri_bo_unmap(bo);
812
813     /* private function & data */
814     pp_context->pp_x_steps = pp_load_save_x_steps;
815     pp_context->pp_y_steps = pp_load_save_y_steps;
816     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
817     pp_load_save_context->dest_h = h;
818     pp_load_save_context->dest_w = w;
819
820     ironlake_pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
821     ironlake_pp_inline_parameter.grf5.number_blocks = w / 16;
822 }
823
824 static int
825 pp_scaling_x_steps(void *private_context)
826 {
827     return 1;
828 }
829
830 static int
831 pp_scaling_y_steps(void *private_context)
832 {
833     struct pp_scaling_context *pp_scaling_context = private_context;
834
835     return pp_scaling_context->dest_h / 8;
836 }
837
838 static int
839 pp_scaling_set_block_parameter(void *private_context, int x, int y)
840 {
841     float src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step;
842     float src_y_steping = ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
843
844     ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
845     ironlake_pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
846     ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
847     ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
848     
849     return 0;
850 }
851
852 static void
853 ironlake_pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
854                                     unsigned short srcw, unsigned short srch,
855                                     unsigned short destw, unsigned short desth)
856 {
857     struct i965_driver_data *i965 = i965_driver_data(ctx);
858     struct i965_post_processing_context *pp_context = i965->pp_context;
859     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
860     struct object_surface *obj_surface;
861     struct i965_sampler_state *sampler_state;
862     struct i965_surface_state *ss;
863     dri_bo *bo;
864     int index;
865     int w, h;
866     int orig_w, orig_h;
867     int pp_out_w, pp_out_h;
868     int orig_pp_out_w, orig_pp_out_h;
869
870     /* surface */
871     obj_surface = SURFACE(surface);
872     orig_w = obj_surface->orig_width;
873     orig_h = obj_surface->orig_height;
874     w = obj_surface->width;
875     h = obj_surface->height;
876
877     orig_pp_out_w = destw;
878     orig_pp_out_h = desth;
879     pp_out_w = ALIGN(orig_pp_out_w, 16);
880     pp_out_h = ALIGN(orig_pp_out_h, 16);
881     dri_bo_unreference(obj_surface->pp_out_bo);
882     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
883                                           "intermediate surface",
884                                           SIZE_YUV420(pp_out_w, pp_out_h),
885                                           4096);
886     assert(obj_surface->pp_out_bo);
887     obj_surface->orig_pp_out_width = orig_pp_out_w;
888     obj_surface->orig_pp_out_height = orig_pp_out_h;
889     obj_surface->pp_out_width = pp_out_w;
890     obj_surface->pp_out_height = pp_out_h;
891
892     /* source Y surface index 1 */
893     index = 1;
894     pp_context->surfaces[index].s_bo = obj_surface->bo;
895     dri_bo_reference(pp_context->surfaces[index].s_bo);
896     bo = dri_bo_alloc(i965->intel.bufmgr, 
897                       "surface state", 
898                       sizeof(struct i965_surface_state), 
899                       4096);
900     assert(bo);
901     pp_context->surfaces[index].ss_bo = bo;
902     dri_bo_map(bo, True);
903     assert(bo->virtual);
904     ss = bo->virtual;
905     memset(ss, 0, sizeof(*ss));
906     ss->ss0.surface_type = I965_SURFACE_2D;
907     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
908     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
909     ss->ss2.width = orig_w - 1;
910     ss->ss2.height = orig_h - 1;
911     ss->ss3.pitch = w - 1;
912     dri_bo_emit_reloc(bo,
913                       I915_GEM_DOMAIN_RENDER, 
914                       0,
915                       0,
916                       offsetof(struct i965_surface_state, ss1),
917                       pp_context->surfaces[index].s_bo);
918     dri_bo_unmap(bo);
919
920     /* source UV surface index 2 */
921     index = 2;
922     pp_context->surfaces[index].s_bo = obj_surface->bo;
923     dri_bo_reference(pp_context->surfaces[index].s_bo);
924     bo = dri_bo_alloc(i965->intel.bufmgr, 
925                       "surface state", 
926                       sizeof(struct i965_surface_state), 
927                       4096);
928     assert(bo);
929     pp_context->surfaces[index].ss_bo = bo;
930     dri_bo_map(bo, True);
931     assert(bo->virtual);
932     ss = bo->virtual;
933     memset(ss, 0, sizeof(*ss));
934     ss->ss0.surface_type = I965_SURFACE_2D;
935     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
936     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
937     ss->ss2.width = orig_w / 2 - 1;
938     ss->ss2.height = orig_h / 2 - 1;
939     ss->ss3.pitch = w - 1;
940     dri_bo_emit_reloc(bo,
941                       I915_GEM_DOMAIN_RENDER, 
942                       0,
943                       w * h,
944                       offsetof(struct i965_surface_state, ss1),
945                       pp_context->surfaces[index].s_bo);
946     dri_bo_unmap(bo);
947
948     /* destination Y surface index 7 */
949     index = 7;
950     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
951     dri_bo_reference(pp_context->surfaces[index].s_bo);
952     bo = dri_bo_alloc(i965->intel.bufmgr, 
953                       "surface state", 
954                       sizeof(struct i965_surface_state), 
955                       4096);
956     assert(bo);
957     pp_context->surfaces[index].ss_bo = bo;
958     dri_bo_map(bo, True);
959     assert(bo->virtual);
960     ss = bo->virtual;
961     memset(ss, 0, sizeof(*ss));
962     ss->ss0.surface_type = I965_SURFACE_2D;
963     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
964     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
965     ss->ss2.width = pp_out_w / 4 - 1;
966     ss->ss2.height = pp_out_h - 1;
967     ss->ss3.pitch = pp_out_w - 1;
968     dri_bo_emit_reloc(bo,
969                       I915_GEM_DOMAIN_RENDER, 
970                       I915_GEM_DOMAIN_RENDER,
971                       0,
972                       offsetof(struct i965_surface_state, ss1),
973                       pp_context->surfaces[index].s_bo);
974     dri_bo_unmap(bo);
975
976     /* destination UV surface index 8 */
977     index = 8;
978     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
979     dri_bo_reference(pp_context->surfaces[index].s_bo);
980     bo = dri_bo_alloc(i965->intel.bufmgr, 
981                       "surface state", 
982                       sizeof(struct i965_surface_state), 
983                       4096);
984     assert(bo);
985     pp_context->surfaces[index].ss_bo = bo;
986     dri_bo_map(bo, True);
987     assert(bo->virtual);
988     ss = bo->virtual;
989     memset(ss, 0, sizeof(*ss));
990     ss->ss0.surface_type = I965_SURFACE_2D;
991     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
992     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
993     ss->ss2.width = pp_out_w / 4 - 1;
994     ss->ss2.height = pp_out_h / 2 - 1;
995     ss->ss3.pitch = pp_out_w - 1;
996     dri_bo_emit_reloc(bo,
997                       I915_GEM_DOMAIN_RENDER, 
998                       I915_GEM_DOMAIN_RENDER,
999                       pp_out_w * pp_out_h,
1000                       offsetof(struct i965_surface_state, ss1),
1001                       pp_context->surfaces[index].s_bo);
1002     dri_bo_unmap(bo);
1003
1004     /* sampler state */
1005     dri_bo_map(pp_context->sampler_state_table.bo, True);
1006     assert(pp_context->sampler_state_table.bo->virtual);
1007     sampler_state = pp_context->sampler_state_table.bo->virtual;
1008
1009     /* SIMD16 Y index 1 */
1010     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1011     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1012     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1013     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1014     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1015
1016     /* SIMD16 UV index 2 */
1017     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1018     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1019     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1020     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1021     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1022
1023     dri_bo_unmap(pp_context->sampler_state_table.bo);
1024
1025     /* private function & data */
1026     pp_context->pp_x_steps = pp_scaling_x_steps;
1027     pp_context->pp_y_steps = pp_scaling_y_steps;
1028     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1029
1030     pp_scaling_context->dest_w = pp_out_w;
1031     pp_scaling_context->dest_h = pp_out_h;
1032
1033     ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1034     ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1035     ironlake_pp_inline_parameter.grf5.block_count_x = pp_out_w / 16;   /* 1 x N */
1036     ironlake_pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
1037     ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1038     ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1039 }
1040
1041 static int
1042 pp_avs_x_steps(void *private_context)
1043 {
1044     struct pp_avs_context *pp_avs_context = private_context;
1045
1046     return pp_avs_context->dest_w / 16;
1047 }
1048
1049 static int
1050 pp_avs_y_steps(void *private_context)
1051 {
1052     return 1;
1053 }
1054
1055 static int
1056 pp_avs_set_block_parameter(void *private_context, int x, int y)
1057 {
1058     struct pp_avs_context *pp_avs_context = private_context;
1059     float src_x_steping, src_y_steping, video_step_delta;
1060     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1061
1062     if (tmp_w >= pp_avs_context->dest_w) {
1063         ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1064         ironlake_pp_inline_parameter.grf6.video_step_delta = 0;
1065         
1066         if (x == 0) {
1067             ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1068         } else {
1069             src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1070             video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta;
1071             ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1072                 16 * 15 * video_step_delta / 2;
1073         }
1074     } else {
1075         int n0, n1, n2, nls_left, nls_right;
1076         int factor_a = 5, factor_b = 4;
1077         float f;
1078
1079         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1080         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1081         n2 = tmp_w / (16 * factor_a);
1082         nls_left = n0 + n2;
1083         nls_right = n1 + n2;
1084         f = (float) n2 * 16 / tmp_w;
1085         
1086         if (n0 < 5) {
1087             ironlake_pp_inline_parameter.grf6.video_step_delta = 0.0;
1088
1089             if (x == 0) {
1090                 ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1091                 ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1092             } else {
1093                 src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1094                 video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta;
1095                 ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1096                     16 * 15 * video_step_delta / 2;
1097             }
1098         } else {
1099             if (x < nls_left) {
1100                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1101                 float a = f / (nls_left * 16 * factor_b);
1102                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1103                 
1104                 ironlake_pp_inline_parameter.grf6.video_step_delta = b;
1105
1106                 if (x == 0) {
1107                     ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1108                     ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1109                 } else {
1110                     src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1111                     video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta;
1112                     ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1113                         16 * 15 * video_step_delta / 2;
1114                     ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1115                 }
1116             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1117                 /* scale the center linearly */
1118                 src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1119                 video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta;
1120                 ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1121                     16 * 15 * video_step_delta / 2;
1122                 ironlake_pp_inline_parameter.grf6.video_step_delta = 0.0;
1123                 ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1124             } else {
1125                 float a = f / (nls_right * 16 * factor_b);
1126                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1127
1128                 src_x_steping = ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1129                 video_step_delta = ironlake_pp_inline_parameter.grf6.video_step_delta;
1130                 ironlake_pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1131                     16 * 15 * video_step_delta / 2;
1132                 ironlake_pp_inline_parameter.grf6.video_step_delta = -b;
1133
1134                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1135                     ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1136                 else
1137                     ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1138             }
1139         }
1140     }
1141
1142     src_y_steping = ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1143     ironlake_pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1144     ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1145     ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1146
1147     return 0;
1148 }
1149
1150 static void
1151 ironlake_pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1152                                 unsigned short srcw, unsigned short srch,
1153                                 unsigned short destw, unsigned short desth)
1154 {
1155     struct i965_driver_data *i965 = i965_driver_data(ctx);
1156     struct i965_post_processing_context *pp_context = i965->pp_context;
1157     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1158     struct object_surface *obj_surface;
1159     struct i965_surface_state *ss;
1160     struct i965_sampler_8x8 *sampler_8x8;
1161     struct i965_sampler_8x8_state *sampler_8x8_state;
1162     struct i965_surface_state2 *ss_8x8;
1163     dri_bo *bo;
1164     int index;
1165     int w, h;
1166     int orig_w, orig_h;
1167     int pp_out_w, pp_out_h;
1168     int orig_pp_out_w, orig_pp_out_h;
1169
1170     /* surface */
1171     obj_surface = SURFACE(surface);
1172     
1173     if (input == 1) {
1174         assert(obj_surface->pp_out_bo);
1175         orig_w = obj_surface->orig_pp_out_width;
1176         orig_h = obj_surface->orig_pp_out_height;
1177         w = obj_surface->pp_out_width;
1178         h = obj_surface->pp_out_height;
1179     } else {
1180         orig_w = obj_surface->orig_width;
1181         orig_h = obj_surface->orig_height;
1182         w = obj_surface->width;
1183         h = obj_surface->height;
1184     } 
1185     /* source Y surface index 1 */
1186     index = 1;
1187     pp_context->surfaces[index].s_bo = (input == 1 ? obj_surface->pp_out_bo : obj_surface->bo);
1188     dri_bo_reference(pp_context->surfaces[index].s_bo);
1189     bo = dri_bo_alloc(i965->intel.bufmgr, 
1190                       "Y surface state for sample_8x8", 
1191                       sizeof(struct i965_surface_state2), 
1192                       4096);
1193     assert(bo);
1194     pp_context->surfaces[index].ss_bo = bo;
1195     dri_bo_map(bo, True);
1196     assert(bo->virtual);
1197     ss_8x8 = bo->virtual;
1198     memset(ss_8x8, 0, sizeof(*ss_8x8));
1199     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1200     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1201     ss_8x8->ss1.width = orig_w - 1;
1202     ss_8x8->ss1.height = orig_h - 1;
1203     ss_8x8->ss2.half_pitch_for_chroma = 0;
1204     ss_8x8->ss2.pitch = w - 1;
1205     ss_8x8->ss2.interleave_chroma = 0;
1206     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1207     ss_8x8->ss3.x_offset_for_cb = 0;
1208     ss_8x8->ss3.y_offset_for_cb = 0;
1209     dri_bo_emit_reloc(bo,
1210                       I915_GEM_DOMAIN_RENDER, 
1211                       0,
1212                       0,
1213                       offsetof(struct i965_surface_state2, ss0),
1214                       pp_context->surfaces[index].s_bo);
1215     dri_bo_unmap(bo);
1216
1217     /* source UV surface index 2 */
1218     index = 2;
1219     pp_context->surfaces[index].s_bo = (input == 1 ? obj_surface->pp_out_bo : obj_surface->bo);
1220     dri_bo_reference(pp_context->surfaces[index].s_bo);
1221     bo = dri_bo_alloc(i965->intel.bufmgr, 
1222                       "UV surface state for sample_8x8", 
1223                       sizeof(struct i965_surface_state2), 
1224                       4096);
1225     assert(bo);
1226     pp_context->surfaces[index].ss_bo = bo;
1227     dri_bo_map(bo, True);
1228     assert(bo->virtual);
1229     ss_8x8 = bo->virtual;
1230     memset(ss_8x8, 0, sizeof(*ss_8x8));
1231     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1232     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1233     ss_8x8->ss1.width = orig_w / 2 - 1;
1234     ss_8x8->ss1.height = orig_h / 2 - 1;
1235     ss_8x8->ss2.half_pitch_for_chroma = 0;
1236     ss_8x8->ss2.pitch = w - 1;
1237     ss_8x8->ss2.interleave_chroma = 0;
1238     ss_8x8->ss2.surface_format = SURFACE_FORMAT_R8B8_UNORM;
1239     ss_8x8->ss3.x_offset_for_cb = 0;
1240     ss_8x8->ss3.y_offset_for_cb = 0;
1241     dri_bo_emit_reloc(bo,
1242                       I915_GEM_DOMAIN_RENDER, 
1243                       0,
1244                       w * h,
1245                       offsetof(struct i965_surface_state2, ss0),
1246                       pp_context->surfaces[index].s_bo);
1247     dri_bo_unmap(bo);
1248
1249     orig_pp_out_w = destw;
1250     orig_pp_out_h = desth;
1251     pp_out_w = ALIGN(orig_pp_out_w, 16);
1252     pp_out_h = ALIGN(orig_pp_out_h, 16);
1253     dri_bo_unreference(obj_surface->pp_out_bo);
1254     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1255                                           "intermediate surface",
1256                                           SIZE_YUV420(pp_out_w, pp_out_h),
1257                                           4096);
1258     assert(obj_surface->pp_out_bo);
1259     obj_surface->orig_pp_out_width = orig_pp_out_w;
1260     obj_surface->orig_pp_out_height = orig_pp_out_h;
1261     obj_surface->pp_out_width = pp_out_w;
1262     obj_surface->pp_out_height = pp_out_h;
1263
1264     /* destination Y surface index 7 */
1265     index = 7;
1266     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1267     dri_bo_reference(pp_context->surfaces[index].s_bo);
1268     bo = dri_bo_alloc(i965->intel.bufmgr, 
1269                       "surface state", 
1270                       sizeof(struct i965_surface_state), 
1271                       4096);
1272     assert(bo);
1273     pp_context->surfaces[index].ss_bo = bo;
1274     dri_bo_map(bo, True);
1275     assert(bo->virtual);
1276     ss = bo->virtual;
1277     memset(ss, 0, sizeof(*ss));
1278     ss->ss0.surface_type = I965_SURFACE_2D;
1279     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1280     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1281     ss->ss2.width = pp_out_w / 4 - 1;
1282     ss->ss2.height = pp_out_h - 1;
1283     ss->ss3.pitch = pp_out_w - 1;
1284     dri_bo_emit_reloc(bo,
1285                       I915_GEM_DOMAIN_RENDER, 
1286                       I915_GEM_DOMAIN_RENDER,
1287                       0,
1288                       offsetof(struct i965_surface_state, ss1),
1289                       pp_context->surfaces[index].s_bo);
1290     dri_bo_unmap(bo);
1291
1292     /* destination UV surface index 8 */
1293     index = 8;
1294     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1295     dri_bo_reference(pp_context->surfaces[index].s_bo);
1296     bo = dri_bo_alloc(i965->intel.bufmgr, 
1297                       "surface state", 
1298                       sizeof(struct i965_surface_state), 
1299                       4096);
1300     assert(bo);
1301     pp_context->surfaces[index].ss_bo = bo;
1302     dri_bo_map(bo, True);
1303     assert(bo->virtual);
1304     ss = bo->virtual;
1305     memset(ss, 0, sizeof(*ss));
1306     ss->ss0.surface_type = I965_SURFACE_2D;
1307     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1308     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1309     ss->ss2.width = pp_out_w / 4 - 1;
1310     ss->ss2.height = pp_out_h / 2 - 1;
1311     ss->ss3.pitch = pp_out_w - 1;
1312     dri_bo_emit_reloc(bo,
1313                       I915_GEM_DOMAIN_RENDER, 
1314                       I915_GEM_DOMAIN_RENDER,
1315                       pp_out_w * pp_out_h,
1316                       offsetof(struct i965_surface_state, ss1),
1317                       pp_context->surfaces[index].s_bo);
1318     dri_bo_unmap(bo);
1319     
1320     /* sampler 8x8 state */
1321     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1322     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1323     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1324     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1325     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1326     sampler_8x8_state->dw136.default_sharpness_level = 0;
1327     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1328     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1329     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1330     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1331
1332     /* sampler 8x8 */
1333     dri_bo_map(pp_context->sampler_state_table.bo, True);
1334     assert(pp_context->sampler_state_table.bo->virtual);
1335     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1336     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1337
1338     /* sample_8x8 Y index 1 */
1339     index = 1;
1340     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1341     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1342     sampler_8x8[index].dw0.ief_bypass = 0;
1343     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1344     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1345     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1346     sampler_8x8[index].dw2.global_noise_estimation = 22;
1347     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1348     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1349     sampler_8x8[index].dw3.strong_edge_weight = 7;
1350     sampler_8x8[index].dw3.regular_weight = 2;
1351     sampler_8x8[index].dw3.non_edge_weight = 0;
1352     sampler_8x8[index].dw3.gain_factor = 40;
1353     sampler_8x8[index].dw4.steepness_boost = 0;
1354     sampler_8x8[index].dw4.steepness_threshold = 0;
1355     sampler_8x8[index].dw4.mr_boost = 0;
1356     sampler_8x8[index].dw4.mr_threshold = 5;
1357     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1358     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1359     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1360     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1361     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1362     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1363     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1364     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1365     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1366     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1367     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1368     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1369     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1370     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1371     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1372     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1373     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1374     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1375     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1376     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1377     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1378     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1379     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1380     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1381     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1382     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1383     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1384     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1385     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1386     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1387     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1388     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1389     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1390     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1391     sampler_8x8[index].dw13.limiter_boost = 0;
1392     sampler_8x8[index].dw13.minimum_limiter = 10;
1393     sampler_8x8[index].dw13.maximum_limiter = 11;
1394     sampler_8x8[index].dw14.clip_limiter = 130;
1395     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1396                       I915_GEM_DOMAIN_RENDER, 
1397                       0,
1398                       0,
1399                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1400                       pp_context->sampler_state_table.bo_8x8);
1401
1402     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1403     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1404     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1405     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1406     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1407     sampler_8x8_state->dw136.default_sharpness_level = 0;
1408     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1409     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1410     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1411     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1412
1413     /* sample_8x8 UV index 2 */
1414     index = 2;
1415     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1416     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1417     sampler_8x8[index].dw0.ief_bypass = 0;
1418     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1419     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1420     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1421     sampler_8x8[index].dw2.global_noise_estimation = 22;
1422     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1423     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1424     sampler_8x8[index].dw3.strong_edge_weight = 7;
1425     sampler_8x8[index].dw3.regular_weight = 2;
1426     sampler_8x8[index].dw3.non_edge_weight = 0;
1427     sampler_8x8[index].dw3.gain_factor = 40;
1428     sampler_8x8[index].dw4.steepness_boost = 0;
1429     sampler_8x8[index].dw4.steepness_threshold = 0;
1430     sampler_8x8[index].dw4.mr_boost = 0;
1431     sampler_8x8[index].dw4.mr_threshold = 5;
1432     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1433     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1434     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1435     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1436     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1437     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1438     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1439     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1440     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1441     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1442     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1443     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1444     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1445     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1446     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1447     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1448     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1449     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1450     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1451     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1452     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1453     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1454     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1455     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1456     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1457     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1458     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1459     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1460     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1461     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1462     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1463     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1464     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1465     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1466     sampler_8x8[index].dw13.limiter_boost = 0;
1467     sampler_8x8[index].dw13.minimum_limiter = 10;
1468     sampler_8x8[index].dw13.maximum_limiter = 11;
1469     sampler_8x8[index].dw14.clip_limiter = 130;
1470     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1471                       I915_GEM_DOMAIN_RENDER, 
1472                       0,
1473                       0,
1474                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1475                       pp_context->sampler_state_table.bo_8x8_uv);
1476
1477     dri_bo_unmap(pp_context->sampler_state_table.bo);
1478
1479     /* private function & data */
1480     pp_context->pp_x_steps = pp_avs_x_steps;
1481     pp_context->pp_y_steps = pp_avs_y_steps;
1482     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1483
1484     pp_avs_context->dest_w = pp_out_w;
1485     pp_avs_context->dest_h = pp_out_h;
1486     pp_avs_context->src_w = w;
1487     pp_avs_context->src_h = h;
1488
1489     ironlake_pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1490     ironlake_pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1491     ironlake_pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1492     ironlake_pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1493     ironlake_pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1494     ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1495     ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1496     ironlake_pp_inline_parameter.grf6.video_step_delta = 0.0;
1497 }
1498
1499 static int
1500 pp_dndi_x_steps(void *private_context)
1501 {
1502     return 1;
1503 }
1504
1505 static int
1506 pp_dndi_y_steps(void *private_context)
1507 {
1508     struct pp_dndi_context *pp_dndi_context = private_context;
1509
1510     return pp_dndi_context->dest_h / 4;
1511 }
1512
1513 static int
1514 pp_dndi_set_block_parameter(void *private_context, int x, int y)
1515 {
1516     ironlake_pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1517     ironlake_pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1518
1519     return 0;
1520 }
1521
1522 static 
1523 void ironlake_pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1524                                       unsigned short srcw, unsigned short srch,
1525                                       unsigned short destw, unsigned short desth)
1526 {
1527     struct i965_driver_data *i965 = i965_driver_data(ctx);
1528     struct i965_post_processing_context *pp_context = i965->pp_context;
1529     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1530     struct object_surface *obj_surface;
1531     struct i965_surface_state *ss;
1532     struct i965_surface_state2 *ss_dndi;
1533     struct i965_sampler_dndi *sampler_dndi;
1534     dri_bo *bo;
1535     int index;
1536     int w, h;
1537     int orig_w, orig_h;
1538
1539     /* surface */
1540     obj_surface = SURFACE(surface);
1541     orig_w = obj_surface->orig_width;
1542     orig_h = obj_surface->orig_height;
1543     w = obj_surface->width;
1544     h = obj_surface->height;
1545
1546     if (pp_context->stmm.bo == NULL) {
1547         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1548                                            "STMM surface",
1549                                            w * h,
1550                                            4096);
1551         assert(pp_context->stmm.bo);
1552     }
1553
1554     dri_bo_unreference(obj_surface->pp_out_bo);
1555     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1556                                           "intermediate surface",
1557                                           SIZE_YUV420(w, h),
1558                                           4096);
1559     assert(obj_surface->pp_out_bo);
1560     obj_surface->orig_pp_out_width = orig_w;
1561     obj_surface->orig_pp_out_height = orig_h;
1562     obj_surface->pp_out_width = w;
1563     obj_surface->pp_out_height = h;
1564
1565     /* source UV surface index 2 */
1566     index = 2;
1567     pp_context->surfaces[index].s_bo = obj_surface->bo;
1568     dri_bo_reference(pp_context->surfaces[index].s_bo);
1569     bo = dri_bo_alloc(i965->intel.bufmgr, 
1570                       "surface state", 
1571                       sizeof(struct i965_surface_state), 
1572                       4096);
1573     assert(bo);
1574     pp_context->surfaces[index].ss_bo = bo;
1575     dri_bo_map(bo, True);
1576     assert(bo->virtual);
1577     ss = bo->virtual;
1578     memset(ss, 0, sizeof(*ss));
1579     ss->ss0.surface_type = I965_SURFACE_2D;
1580     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1581     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1582     ss->ss2.width = orig_w / 4 - 1;
1583     ss->ss2.height = orig_h / 2 - 1;
1584     ss->ss3.pitch = w - 1;
1585     dri_bo_emit_reloc(bo,
1586                       I915_GEM_DOMAIN_RENDER, 
1587                       0,
1588                       w * h,
1589                       offsetof(struct i965_surface_state, ss1),
1590                       pp_context->surfaces[index].s_bo);
1591     dri_bo_unmap(bo);
1592
1593     /* source YUV surface index 4 */
1594     index = 4;
1595     pp_context->surfaces[index].s_bo = obj_surface->bo;
1596     dri_bo_reference(pp_context->surfaces[index].s_bo);
1597     bo = dri_bo_alloc(i965->intel.bufmgr, 
1598                       "YUV surface state for deinterlace ", 
1599                       sizeof(struct i965_surface_state2), 
1600                       4096);
1601     assert(bo);
1602     pp_context->surfaces[index].ss_bo = bo;
1603     dri_bo_map(bo, True);
1604     assert(bo->virtual);
1605     ss_dndi = bo->virtual;
1606     memset(ss_dndi, 0, sizeof(*ss_dndi));
1607     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1608     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1609     ss_dndi->ss1.width = w - 1;
1610     ss_dndi->ss1.height = h - 1;
1611     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1612     ss_dndi->ss2.half_pitch_for_chroma = 0;
1613     ss_dndi->ss2.pitch = w - 1;
1614     ss_dndi->ss2.interleave_chroma = 1;
1615     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1616     ss_dndi->ss2.half_pitch_for_chroma = 0;
1617     ss_dndi->ss2.tiled_surface = 0;
1618     ss_dndi->ss3.x_offset_for_cb = 0;
1619     ss_dndi->ss3.y_offset_for_cb = h;
1620     dri_bo_emit_reloc(bo,
1621                       I915_GEM_DOMAIN_RENDER, 
1622                       0,
1623                       0,
1624                       offsetof(struct i965_surface_state2, ss0),
1625                       pp_context->surfaces[index].s_bo);
1626     dri_bo_unmap(bo);
1627
1628     /* source STMM surface index 20 */
1629     index = 20;
1630     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1631     dri_bo_reference(pp_context->surfaces[index].s_bo);
1632     bo = dri_bo_alloc(i965->intel.bufmgr, 
1633                       "STMM surface state for deinterlace ", 
1634                       sizeof(struct i965_surface_state2), 
1635                       4096);
1636     assert(bo);
1637     pp_context->surfaces[index].ss_bo = bo;
1638     dri_bo_map(bo, True);
1639     assert(bo->virtual);
1640     ss = bo->virtual;
1641     memset(ss, 0, sizeof(*ss));
1642     ss->ss0.surface_type = I965_SURFACE_2D;
1643     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1644     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1645     ss->ss2.width = w - 1;
1646     ss->ss2.height = h - 1;
1647     ss->ss3.pitch = w - 1;
1648     dri_bo_emit_reloc(bo,
1649                       I915_GEM_DOMAIN_RENDER, 
1650                       I915_GEM_DOMAIN_RENDER,
1651                       0,
1652                       offsetof(struct i965_surface_state, ss1),
1653                       pp_context->surfaces[index].s_bo);
1654     dri_bo_unmap(bo);
1655
1656     /* destination Y surface index 7 */
1657     index = 7;
1658     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1659     dri_bo_reference(pp_context->surfaces[index].s_bo);
1660     bo = dri_bo_alloc(i965->intel.bufmgr, 
1661                       "surface state", 
1662                       sizeof(struct i965_surface_state), 
1663                       4096);
1664     assert(bo);
1665     pp_context->surfaces[index].ss_bo = bo;
1666     dri_bo_map(bo, True);
1667     assert(bo->virtual);
1668     ss = bo->virtual;
1669     memset(ss, 0, sizeof(*ss));
1670     ss->ss0.surface_type = I965_SURFACE_2D;
1671     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1672     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1673     ss->ss2.width = w / 4 - 1;
1674     ss->ss2.height = h - 1;
1675     ss->ss3.pitch = w - 1;
1676     dri_bo_emit_reloc(bo,
1677                       I915_GEM_DOMAIN_RENDER, 
1678                       I915_GEM_DOMAIN_RENDER,
1679                       0,
1680                       offsetof(struct i965_surface_state, ss1),
1681                       pp_context->surfaces[index].s_bo);
1682     dri_bo_unmap(bo);
1683
1684     /* destination UV surface index 8 */
1685     index = 8;
1686     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1687     dri_bo_reference(pp_context->surfaces[index].s_bo);
1688     bo = dri_bo_alloc(i965->intel.bufmgr, 
1689                       "surface state", 
1690                       sizeof(struct i965_surface_state), 
1691                       4096);
1692     assert(bo);
1693     pp_context->surfaces[index].ss_bo = bo;
1694     dri_bo_map(bo, True);
1695     assert(bo->virtual);
1696     ss = bo->virtual;
1697     memset(ss, 0, sizeof(*ss));
1698     ss->ss0.surface_type = I965_SURFACE_2D;
1699     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1700     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1701     ss->ss2.width = w / 4 - 1;
1702     ss->ss2.height = h / 2 - 1;
1703     ss->ss3.pitch = w - 1;
1704     dri_bo_emit_reloc(bo,
1705                       I915_GEM_DOMAIN_RENDER, 
1706                       I915_GEM_DOMAIN_RENDER,
1707                       w * h,
1708                       offsetof(struct i965_surface_state, ss1),
1709                       pp_context->surfaces[index].s_bo);
1710     dri_bo_unmap(bo);
1711
1712     /* sampler dndi */
1713     dri_bo_map(pp_context->sampler_state_table.bo, True);
1714     assert(pp_context->sampler_state_table.bo->virtual);
1715     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1716     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1717
1718     /* sample dndi index 1 */
1719     index = 0;
1720     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1721     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1722     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1723     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1724
1725     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1726     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1727     sampler_dndi[index].dw1.stmm_c2 = 0;
1728     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1729     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1730
1731     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1732     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1733     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1734     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1735
1736     sampler_dndi[index].dw3.maximum_stmm = 128;
1737     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1738     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1739     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1740     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1741
1742     sampler_dndi[index].dw4.sdi_delta = 8;
1743     sampler_dndi[index].dw4.sdi_threshold = 128;
1744     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1745     sampler_dndi[index].dw4.stmm_shift_up = 0;
1746     sampler_dndi[index].dw4.stmm_shift_down = 0;
1747     sampler_dndi[index].dw4.minimum_stmm = 0;
1748
1749     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1750     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1751     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1752     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1753
1754     sampler_dndi[index].dw6.dn_enable = 1;
1755     sampler_dndi[index].dw6.di_enable = 1;
1756     sampler_dndi[index].dw6.di_partial = 0;
1757     sampler_dndi[index].dw6.dndi_top_first = 1;
1758     sampler_dndi[index].dw6.dndi_stream_id = 1;
1759     sampler_dndi[index].dw6.dndi_first_frame = 1;
1760     sampler_dndi[index].dw6.progressive_dn = 0;
1761     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1762     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1763     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1764
1765     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1766     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1767     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1768     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1769
1770     dri_bo_unmap(pp_context->sampler_state_table.bo);
1771
1772     /* private function & data */
1773     pp_context->pp_x_steps = pp_dndi_x_steps;
1774     pp_context->pp_y_steps = pp_dndi_y_steps;
1775     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1776
1777     ironlake_pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1778     ironlake_pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1779     ironlake_pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1780     ironlake_pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1781
1782     ironlake_pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1783     ironlake_pp_inline_parameter.grf5.number_blocks = w / 16;
1784     ironlake_pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1785     ironlake_pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1786
1787     pp_dndi_context->dest_w = w;
1788     pp_dndi_context->dest_h = h;
1789 }
1790
1791 static void
1792 ironlake_pp_initialize(VADriverContextP ctx,
1793                        VASurfaceID surface,
1794                        int input,
1795                        short srcx,
1796                        short srcy,
1797                        unsigned short srcw,
1798                        unsigned short srch,
1799                        short destx,
1800                        short desty,
1801                        unsigned short destw,
1802                        unsigned short desth,
1803                        int pp_index)
1804 {
1805     struct i965_driver_data *i965 = i965_driver_data(ctx);
1806     struct i965_post_processing_context *pp_context = i965->pp_context;
1807     struct pp_module *pp_module;
1808     dri_bo *bo;
1809     int i;
1810
1811     dri_bo_unreference(pp_context->curbe.bo);
1812     bo = dri_bo_alloc(i965->intel.bufmgr,
1813                       "constant buffer",
1814                       4096, 
1815                       4096);
1816     assert(bo);
1817     pp_context->curbe.bo = bo;
1818
1819     dri_bo_unreference(pp_context->binding_table.bo);
1820     bo = dri_bo_alloc(i965->intel.bufmgr, 
1821                       "binding table",
1822                       sizeof(unsigned int), 
1823                       4096);
1824     assert(bo);
1825     pp_context->binding_table.bo = bo;
1826
1827     dri_bo_unreference(pp_context->idrt.bo);
1828     bo = dri_bo_alloc(i965->intel.bufmgr, 
1829                       "interface discriptor", 
1830                       sizeof(struct i965_interface_descriptor), 
1831                       4096);
1832     assert(bo);
1833     pp_context->idrt.bo = bo;
1834
1835     dri_bo_unreference(pp_context->sampler_state_table.bo);
1836     bo = dri_bo_alloc(i965->intel.bufmgr, 
1837                       "sampler state table", 
1838                       4096,
1839                       4096);
1840     assert(bo);
1841     dri_bo_map(bo, True);
1842     memset(bo->virtual, 0, bo->size);
1843     dri_bo_unmap(bo);
1844     pp_context->sampler_state_table.bo = bo;
1845
1846     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1847     bo = dri_bo_alloc(i965->intel.bufmgr, 
1848                       "sampler 8x8 state ",
1849                       4096,
1850                       4096);
1851     assert(bo);
1852     pp_context->sampler_state_table.bo_8x8 = bo;
1853
1854     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1855     bo = dri_bo_alloc(i965->intel.bufmgr, 
1856                       "sampler 8x8 state ",
1857                       4096,
1858                       4096);
1859     assert(bo);
1860     pp_context->sampler_state_table.bo_8x8_uv = bo;
1861
1862     dri_bo_unreference(pp_context->vfe_state.bo);
1863     bo = dri_bo_alloc(i965->intel.bufmgr, 
1864                       "vfe state", 
1865                       sizeof(struct i965_vfe_state), 
1866                       4096);
1867     assert(bo);
1868     pp_context->vfe_state.bo = bo;
1869     
1870     for (i = 0; i < MAX_PP_SURFACES; i++) {
1871         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1872         pp_context->surfaces[i].ss_bo = NULL;
1873
1874         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1875         pp_context->surfaces[i].s_bo = NULL;
1876     }
1877
1878     memset(&ironlake_pp_static_parameter, 0, sizeof(ironlake_pp_static_parameter));
1879     memset(&ironlake_pp_inline_parameter, 0, sizeof(ironlake_pp_inline_parameter));
1880     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1881     assert(pp_modules);
1882     pp_context->current_pp = pp_index;
1883     pp_module = &pp_modules[pp_index];
1884     
1885     if (pp_module->initialize)
1886         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1887 }
1888
1889 static void
1890 i965_post_processing_internal(VADriverContextP ctx,
1891                               VASurfaceID surface,
1892                               int input,
1893                               short srcx,
1894                               short srcy,
1895                               unsigned short srcw,
1896                               unsigned short srch,
1897                               short destx,
1898                               short desty,
1899                               unsigned short destw,
1900                               unsigned short desth,
1901                               int pp_index)
1902 {
1903     ironlake_pp_initialize(ctx, surface, input,
1904                            srcx, srcy, srcw, srch,
1905                            destx, desty, destw, desth,
1906                            pp_index);
1907     ironlake_pp_states_setup(ctx);
1908     ironlake_pp_pipeline_setup(ctx);
1909 }
1910
1911 void
1912 i965_post_processing(VADriverContextP ctx,
1913                      VASurfaceID surface,
1914                      short srcx,
1915                      short srcy,
1916                      unsigned short srcw,
1917                      unsigned short srch,
1918                      short destx,
1919                      short desty,
1920                      unsigned short destw,
1921                      unsigned short desth,
1922                      unsigned int flag)
1923 {
1924     struct i965_driver_data *i965 = i965_driver_data(ctx);
1925
1926     if (IS_IRONLAKE(i965->intel.device_id)) {
1927         /* Currently only support post processing for NV12 surface */
1928         if (i965->render_state.interleaved_uv) {
1929             int input = 0;
1930
1931             if (flag & I965_PP_FLAG_DEINTERLACING) {
1932                 i965_post_processing_internal(ctx, surface, input,
1933                                               srcx, srcy, srcw, srch,
1934                                               destx, desty, destw, desth,
1935                                               PP_NV12_DNDI);
1936                 input = 1;
1937             }
1938
1939             if (flag & I965_PP_FLAG_AVS) {
1940                 i965_post_processing_internal(ctx, surface, input,
1941                                               srcx, srcy, srcw, srch,
1942                                               destx, desty, destw, desth,
1943                                               PP_NV12_AVS);
1944             }
1945         }
1946     }
1947 }       
1948
1949 Bool
1950 i965_post_processing_terminate(VADriverContextP ctx)
1951 {
1952     struct i965_driver_data *i965 = i965_driver_data(ctx);
1953     struct i965_post_processing_context *pp_context = i965->pp_context;
1954     int i;
1955
1956     if (pp_context) {
1957         dri_bo_unreference(pp_context->curbe.bo);
1958         pp_context->curbe.bo = NULL;
1959
1960         for (i = 0; i < MAX_PP_SURFACES; i++) {
1961             dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1962             pp_context->surfaces[i].ss_bo = NULL;
1963
1964             dri_bo_unreference(pp_context->surfaces[i].s_bo);
1965             pp_context->surfaces[i].s_bo = NULL;
1966         }
1967
1968         dri_bo_unreference(pp_context->sampler_state_table.bo);
1969         pp_context->sampler_state_table.bo = NULL;
1970
1971         dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1972         pp_context->sampler_state_table.bo_8x8 = NULL;
1973
1974         dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1975         pp_context->sampler_state_table.bo_8x8_uv = NULL;
1976
1977         dri_bo_unreference(pp_context->binding_table.bo);
1978         pp_context->binding_table.bo = NULL;
1979
1980         dri_bo_unreference(pp_context->idrt.bo);
1981         pp_context->idrt.bo = NULL;
1982
1983         dri_bo_unreference(pp_context->vfe_state.bo);
1984         pp_context->vfe_state.bo = NULL;
1985
1986         dri_bo_unreference(pp_context->stmm.bo);
1987         pp_context->stmm.bo = NULL;
1988
1989         free(pp_context);
1990     }
1991
1992     i965->pp_context = NULL;
1993
1994     for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
1995         struct pp_module *pp_module = &pp_modules[i];
1996
1997         dri_bo_unreference(pp_module->bo);
1998         pp_module->bo = NULL;
1999     }
2000
2001     return True;
2002 }
2003
2004 Bool
2005 i965_post_processing_init(VADriverContextP ctx)
2006 {
2007     struct i965_driver_data *i965 = i965_driver_data(ctx);
2008     struct i965_post_processing_context *pp_context = i965->pp_context;
2009     int i;
2010
2011     if (pp_context == NULL) {
2012         pp_context = calloc(1, sizeof(*pp_context));
2013         i965->pp_context = pp_context;
2014     }
2015
2016     pp_context->urb.size = URB_SIZE((&i965->intel));
2017     pp_context->urb.num_vfe_entries = 32;
2018     pp_context->urb.size_vfe_entry = 1;
2019     pp_context->urb.num_cs_entries = 1;
2020     pp_context->urb.size_cs_entry = 2;
2021     pp_context->urb.vfe_start = 0;
2022     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2023         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2024     assert(pp_context->urb.cs_start + 
2025            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2026
2027     if (IS_IRONLAKE(i965->intel.device_id)) {
2028         pp_modules = pp_modules_gen5;
2029     }
2030
2031     for (i = 0; i < NUM_PP_MODULES && pp_modules; i++) {
2032         struct pp_module *pp_module = &pp_modules[i];
2033         dri_bo_unreference(pp_module->bo);
2034         pp_module->bo = dri_bo_alloc(i965->intel.bufmgr,
2035                                      pp_module->name,
2036                                      pp_module->size,
2037                                      4096);
2038         assert(pp_module->bo);
2039         dri_bo_subdata(pp_module->bo, 0, pp_module->size, pp_module->bin);
2040     }
2041
2042     return True;
2043 }