OSDN Git Service

intel-vaapi-driver 1.8.1.pre1
[android-x86/hardware-intel-common-vaapi.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "i965_yuv_coefs.h"
42 #include "intel_media.h"
43
44 #include "gen75_picture_process.h"
45
46 extern VAStatus
47 vpp_surface_convert(VADriverContextP ctx,
48                     struct object_surface *src_obj_surf,
49                     struct object_surface *dst_obj_surf);
50
51 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
52
53 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
54                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
55
56 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
57 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
58
59 #define GPU_ASM_BLOCK_WIDTH         16
60 #define GPU_ASM_BLOCK_HEIGHT        8
61 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
62
63 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
64
65 static const uint32_t pp_null_gen5[][4] = {
66 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
67 };
68
69 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
70 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
71 };
72
73 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
74 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
75 };
76
77 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
78 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
79 };
80
81 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
82 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
83 };
84
85 static const uint32_t pp_nv12_scaling_gen5[][4] = {
86 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
87 };
88
89 static const uint32_t pp_nv12_avs_gen5[][4] = {
90 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
91 };
92
93 static const uint32_t pp_nv12_dndi_gen5[][4] = {
94 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
95 };
96
97 static const uint32_t pp_nv12_dn_gen5[][4] = {
98 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
99 };
100
101 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
102 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
103 };
104
105 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
106 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
107 };
108
109 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
110 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
111 };
112
113 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
114 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
115 };
116
117 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
118 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
119 };
120
121 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
122 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
123 };
124
125 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
126 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
127 };
128
129 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
130                                    const struct i965_surface *src_surface,
131                                    const VARectangle *src_rect,
132                                    struct i965_surface *dst_surface,
133                                    const VARectangle *dst_rect,
134                                    void *filter_param);
135 static VAStatus
136 pp_nv12_avs_initialize(VADriverContextP ctx,
137     struct i965_post_processing_context *pp_context,
138     const struct i965_surface *src_surface, const VARectangle *src_rect,
139     struct i965_surface *dst_surface, const VARectangle *dst_rect,
140     void *filter_param);
141 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
142                                            const struct i965_surface *src_surface,
143                                            const VARectangle *src_rect,
144                                            struct i965_surface *dst_surface,
145                                            const VARectangle *dst_rect,
146                                            void *filter_param);
147 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
148                                              const struct i965_surface *src_surface,
149                                              const VARectangle *src_rect,
150                                              struct i965_surface *dst_surface,
151                                              const VARectangle *dst_rect,
152                                              void *filter_param);
153 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
154                                                 const struct i965_surface *src_surface,
155                                                 const VARectangle *src_rect,
156                                                 struct i965_surface *dst_surface,
157                                                 const VARectangle *dst_rect,
158                                                 void *filter_param);
159 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
160                                         const struct i965_surface *src_surface,
161                                         const VARectangle *src_rect,
162                                         struct i965_surface *dst_surface,
163                                         const VARectangle *dst_rect,
164                                         void *filter_param);
165 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
166                                       const struct i965_surface *src_surface,
167                                       const VARectangle *src_rect,
168                                       struct i965_surface *dst_surface,
169                                       const VARectangle *dst_rect,
170                                       void *filter_param);
171
172 static struct pp_module pp_modules_gen5[] = {
173     {
174         {
175             "NULL module (for testing)",
176             PP_NULL,
177             pp_null_gen5,
178             sizeof(pp_null_gen5),
179             NULL,
180         },
181
182         pp_null_initialize,
183     },
184
185     {
186         {
187             "NV12_NV12",
188             PP_NV12_LOAD_SAVE_N12,
189             pp_nv12_load_save_nv12_gen5,
190             sizeof(pp_nv12_load_save_nv12_gen5),
191             NULL,
192         },
193
194         pp_plx_load_save_plx_initialize,
195     },
196
197     {
198         {
199             "NV12_PL3",
200             PP_NV12_LOAD_SAVE_PL3,
201             pp_nv12_load_save_pl3_gen5,
202             sizeof(pp_nv12_load_save_pl3_gen5),
203             NULL,
204         },
205
206         pp_plx_load_save_plx_initialize,
207     },
208
209     {
210         {
211             "PL3_NV12",
212             PP_PL3_LOAD_SAVE_N12,
213             pp_pl3_load_save_nv12_gen5,
214             sizeof(pp_pl3_load_save_nv12_gen5),
215             NULL,
216         },
217
218         pp_plx_load_save_plx_initialize,
219     },
220
221     {
222         {
223             "PL3_PL3",
224             PP_PL3_LOAD_SAVE_PL3,
225             pp_pl3_load_save_pl3_gen5,
226             sizeof(pp_pl3_load_save_pl3_gen5),
227             NULL,
228         },
229
230         pp_plx_load_save_plx_initialize
231     },
232
233     {
234         {
235             "NV12 Scaling module",
236             PP_NV12_SCALING,
237             pp_nv12_scaling_gen5,
238             sizeof(pp_nv12_scaling_gen5),
239             NULL,
240         },
241
242         pp_nv12_scaling_initialize,
243     },
244
245     {
246         {
247             "NV12 AVS module",
248             PP_NV12_AVS,
249             pp_nv12_avs_gen5,
250             sizeof(pp_nv12_avs_gen5),
251             NULL,
252         },
253
254         pp_nv12_avs_initialize,
255     },
256
257     {
258         {
259             "NV12 DNDI module",
260             PP_NV12_DNDI,
261             pp_nv12_dndi_gen5,
262             sizeof(pp_nv12_dndi_gen5),
263             NULL,
264         },
265
266         pp_nv12_dndi_initialize,
267     },
268
269     {
270         {
271             "NV12 DN module",
272             PP_NV12_DN,
273             pp_nv12_dn_gen5,
274             sizeof(pp_nv12_dn_gen5),
275             NULL,
276         },
277
278         pp_nv12_dn_initialize,
279     },
280
281     {
282         {
283             "NV12_PA module",
284             PP_NV12_LOAD_SAVE_PA,
285             pp_nv12_load_save_pa_gen5,
286             sizeof(pp_nv12_load_save_pa_gen5),
287             NULL,
288         },
289     
290         pp_plx_load_save_plx_initialize,
291     },
292
293     {
294         {
295             "PL3_PA module",
296             PP_PL3_LOAD_SAVE_PA,
297             pp_pl3_load_save_pa_gen5,
298             sizeof(pp_pl3_load_save_pa_gen5),
299             NULL,
300         },
301     
302         pp_plx_load_save_plx_initialize,
303     },
304
305     {
306         {
307             "PA_NV12 module",
308             PP_PA_LOAD_SAVE_NV12,
309             pp_pa_load_save_nv12_gen5,
310             sizeof(pp_pa_load_save_nv12_gen5),
311             NULL,
312         },
313     
314         pp_plx_load_save_plx_initialize,
315     },
316
317     {
318         {
319             "PA_PL3 module",
320             PP_PA_LOAD_SAVE_PL3,
321             pp_pa_load_save_pl3_gen5,
322             sizeof(pp_pa_load_save_pl3_gen5),
323             NULL,
324         },
325     
326         pp_plx_load_save_plx_initialize,
327     },
328
329     {
330         {
331             "PA_PA module",
332             PP_PA_LOAD_SAVE_PA,
333             pp_pa_load_save_pa_gen5,
334             sizeof(pp_pa_load_save_pa_gen5),
335             NULL,
336         },
337
338         pp_plx_load_save_plx_initialize,
339     },
340
341     {
342         {
343             "RGBX_NV12 module",
344             PP_RGBX_LOAD_SAVE_NV12,
345             pp_rgbx_load_save_nv12_gen5,
346             sizeof(pp_rgbx_load_save_nv12_gen5),
347             NULL,
348         },
349     
350         pp_plx_load_save_plx_initialize,
351     },
352             
353     {
354         {
355             "NV12_RGBX module",
356             PP_NV12_LOAD_SAVE_RGBX,
357             pp_nv12_load_save_rgbx_gen5,
358             sizeof(pp_nv12_load_save_rgbx_gen5),
359             NULL,
360         },
361     
362         pp_plx_load_save_plx_initialize,
363     },
364 };
365
366 static const uint32_t pp_null_gen6[][4] = {
367 #include "shaders/post_processing/gen5_6/null.g6b"
368 };
369
370 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
371 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
372 };
373
374 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
375 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
376 };
377
378 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
379 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
380 };
381
382 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
383 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
384 };
385
386 static const uint32_t pp_nv12_scaling_gen6[][4] = {
387 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
388 };
389
390 static const uint32_t pp_nv12_avs_gen6[][4] = {
391 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
392 };
393
394 static const uint32_t pp_nv12_dndi_gen6[][4] = {
395 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
396 };
397
398 static const uint32_t pp_nv12_dn_gen6[][4] = {
399 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
400 };
401
402 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
403 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
404 };
405
406 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
407 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
408 };
409
410 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
411 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
412 };
413
414 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
415 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
416 };
417
418 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
419 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
420 };
421
422 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
423 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
424 };
425
426 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
427 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
428 };
429
430 static struct pp_module pp_modules_gen6[] = {
431     {
432         {
433             "NULL module (for testing)",
434             PP_NULL,
435             pp_null_gen6,
436             sizeof(pp_null_gen6),
437             NULL,
438         },
439
440         pp_null_initialize,
441     },
442
443     {
444         {
445             "NV12_NV12",
446             PP_NV12_LOAD_SAVE_N12,
447             pp_nv12_load_save_nv12_gen6,
448             sizeof(pp_nv12_load_save_nv12_gen6),
449             NULL,
450         },
451
452         pp_plx_load_save_plx_initialize,
453     },
454
455     {
456         {
457             "NV12_PL3",
458             PP_NV12_LOAD_SAVE_PL3,
459             pp_nv12_load_save_pl3_gen6,
460             sizeof(pp_nv12_load_save_pl3_gen6),
461             NULL,
462         },
463         
464         pp_plx_load_save_plx_initialize,
465     },
466
467     {
468         {
469             "PL3_NV12",
470             PP_PL3_LOAD_SAVE_N12,
471             pp_pl3_load_save_nv12_gen6,
472             sizeof(pp_pl3_load_save_nv12_gen6),
473             NULL,
474         },
475
476         pp_plx_load_save_plx_initialize,
477     },
478
479     {
480         {
481             "PL3_PL3",
482             PP_PL3_LOAD_SAVE_PL3,
483             pp_pl3_load_save_pl3_gen6,
484             sizeof(pp_pl3_load_save_pl3_gen6),
485             NULL,
486         },
487
488         pp_plx_load_save_plx_initialize,
489     },
490
491     {
492         {
493             "NV12 Scaling module",
494             PP_NV12_SCALING,
495             pp_nv12_scaling_gen6,
496             sizeof(pp_nv12_scaling_gen6),
497             NULL,
498         },
499
500         gen6_nv12_scaling_initialize,
501     },
502
503     {
504         {
505             "NV12 AVS module",
506             PP_NV12_AVS,
507             pp_nv12_avs_gen6,
508             sizeof(pp_nv12_avs_gen6),
509             NULL,
510         },
511
512         pp_nv12_avs_initialize,
513     },
514
515     {
516         {
517             "NV12 DNDI module",
518             PP_NV12_DNDI,
519             pp_nv12_dndi_gen6,
520             sizeof(pp_nv12_dndi_gen6),
521             NULL,
522         },
523
524         pp_nv12_dndi_initialize,
525     },
526
527     {
528         {
529             "NV12 DN module",
530             PP_NV12_DN,
531             pp_nv12_dn_gen6,
532             sizeof(pp_nv12_dn_gen6),
533             NULL,
534         },
535
536         pp_nv12_dn_initialize,
537     },
538     {
539         {
540             "NV12_PA module",
541             PP_NV12_LOAD_SAVE_PA,
542             pp_nv12_load_save_pa_gen6,
543             sizeof(pp_nv12_load_save_pa_gen6),
544             NULL,
545         },
546     
547         pp_plx_load_save_plx_initialize,
548     },
549
550     {
551         {
552             "PL3_PA module",
553             PP_PL3_LOAD_SAVE_PA,
554             pp_pl3_load_save_pa_gen6,
555             sizeof(pp_pl3_load_save_pa_gen6),
556             NULL,
557         },
558     
559         pp_plx_load_save_plx_initialize,
560     },
561
562     {
563         {
564             "PA_NV12 module",
565             PP_PA_LOAD_SAVE_NV12,
566             pp_pa_load_save_nv12_gen6,
567             sizeof(pp_pa_load_save_nv12_gen6),
568             NULL,
569         },
570     
571         pp_plx_load_save_plx_initialize,
572     },
573
574     {
575         {
576             "PA_PL3 module",
577             PP_PA_LOAD_SAVE_PL3,
578             pp_pa_load_save_pl3_gen6,
579             sizeof(pp_pa_load_save_pl3_gen6),
580             NULL,
581         },
582     
583         pp_plx_load_save_plx_initialize,
584     },
585
586     {
587         {
588             "PA_PA module",
589             PP_PA_LOAD_SAVE_PA,
590             pp_pa_load_save_pa_gen6,
591             sizeof(pp_pa_load_save_pa_gen6),
592             NULL,
593         },
594
595         pp_plx_load_save_plx_initialize,
596     },
597
598     {
599         {
600             "RGBX_NV12 module",
601             PP_RGBX_LOAD_SAVE_NV12,
602             pp_rgbx_load_save_nv12_gen6,
603             sizeof(pp_rgbx_load_save_nv12_gen6),
604             NULL,
605         },
606     
607         pp_plx_load_save_plx_initialize,
608     },
609
610     {
611         {
612             "NV12_RGBX module",
613             PP_NV12_LOAD_SAVE_RGBX,
614             pp_nv12_load_save_rgbx_gen6,
615             sizeof(pp_nv12_load_save_rgbx_gen6),
616             NULL,
617         },
618     
619         pp_plx_load_save_plx_initialize,
620     },
621 };
622
623 static const uint32_t pp_null_gen7[][4] = {
624 };
625
626 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
628 };
629
630 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
631 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
632 };
633
634 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
635 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
636 };
637
638 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
639 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
640 };
641
642 static const uint32_t pp_nv12_scaling_gen7[][4] = {
643 #include "shaders/post_processing/gen7/avs.g7b"
644 };
645
646 static const uint32_t pp_nv12_avs_gen7[][4] = {
647 #include "shaders/post_processing/gen7/avs.g7b"
648 };
649
650 static const uint32_t pp_nv12_dndi_gen7[][4] = {
651 #include "shaders/post_processing/gen7/dndi.g7b"
652 };
653
654 static const uint32_t pp_nv12_dn_gen7[][4] = {
655 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
656 };
657 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
658 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
659 };
660 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
661 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
662 };
663 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
664 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
665 };
666 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
667 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
668 };
669 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
670 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
671 };
672 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
673 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
674 };
675 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
676 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
677 };
678
679 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
680                                            const struct i965_surface *src_surface,
681                                            const VARectangle *src_rect,
682                                            struct i965_surface *dst_surface,
683                                            const VARectangle *dst_rect,
684                                            void *filter_param);
685 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
686                                              const struct i965_surface *src_surface,
687                                              const VARectangle *src_rect,
688                                              struct i965_surface *dst_surface,
689                                              const VARectangle *dst_rect,
690                                              void *filter_param);
691 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
692                                            const struct i965_surface *src_surface,
693                                            const VARectangle *src_rect,
694                                            struct i965_surface *dst_surface,
695                                            const VARectangle *dst_rect,
696                                            void *filter_param);
697
698 static struct pp_module pp_modules_gen7[] = {
699     {
700         {
701             "NULL module (for testing)",
702             PP_NULL,
703             pp_null_gen7,
704             sizeof(pp_null_gen7),
705             NULL,
706         },
707
708         pp_null_initialize,
709     },
710
711     {
712         {
713             "NV12_NV12",
714             PP_NV12_LOAD_SAVE_N12,
715             pp_nv12_load_save_nv12_gen7,
716             sizeof(pp_nv12_load_save_nv12_gen7),
717             NULL,
718         },
719
720         gen7_pp_plx_avs_initialize,
721     },
722
723     {
724         {
725             "NV12_PL3",
726             PP_NV12_LOAD_SAVE_PL3,
727             pp_nv12_load_save_pl3_gen7,
728             sizeof(pp_nv12_load_save_pl3_gen7),
729             NULL,
730         },
731         
732         gen7_pp_plx_avs_initialize,
733     },
734
735     {
736         {
737             "PL3_NV12",
738             PP_PL3_LOAD_SAVE_N12,
739             pp_pl3_load_save_nv12_gen7,
740             sizeof(pp_pl3_load_save_nv12_gen7),
741             NULL,
742         },
743
744         gen7_pp_plx_avs_initialize,
745     },
746
747     {
748         {
749             "PL3_PL3",
750             PP_PL3_LOAD_SAVE_PL3,
751             pp_pl3_load_save_pl3_gen7,
752             sizeof(pp_pl3_load_save_pl3_gen7),
753             NULL,
754         },
755
756         gen7_pp_plx_avs_initialize,
757     },
758
759     {
760         {
761             "NV12 Scaling module",
762             PP_NV12_SCALING,
763             pp_nv12_scaling_gen7,
764             sizeof(pp_nv12_scaling_gen7),
765             NULL,
766         },
767
768         gen7_pp_plx_avs_initialize,
769     },
770
771     {
772         {
773             "NV12 AVS module",
774             PP_NV12_AVS,
775             pp_nv12_avs_gen7,
776             sizeof(pp_nv12_avs_gen7),
777             NULL,
778         },
779
780         gen7_pp_plx_avs_initialize,
781     },
782
783     {
784         {
785             "NV12 DNDI module",
786             PP_NV12_DNDI,
787             pp_nv12_dndi_gen7,
788             sizeof(pp_nv12_dndi_gen7),
789             NULL,
790         },
791
792         gen7_pp_nv12_dndi_initialize,
793     },
794
795     {
796         {
797             "NV12 DN module",
798             PP_NV12_DN,
799             pp_nv12_dn_gen7,
800             sizeof(pp_nv12_dn_gen7),
801             NULL,
802         },
803
804         gen7_pp_nv12_dn_initialize,
805     },
806     {
807         {
808             "NV12_PA module",
809             PP_NV12_LOAD_SAVE_PA,
810             pp_nv12_load_save_pa_gen7,
811             sizeof(pp_nv12_load_save_pa_gen7),
812             NULL,
813         },
814     
815         gen7_pp_plx_avs_initialize,
816     },
817
818     {
819         {
820             "PL3_PA module",
821             PP_PL3_LOAD_SAVE_PA,
822             pp_pl3_load_save_pa_gen7,
823             sizeof(pp_pl3_load_save_pa_gen7),
824             NULL,
825         },
826     
827         gen7_pp_plx_avs_initialize,
828     },
829
830     {
831         {
832             "PA_NV12 module",
833             PP_PA_LOAD_SAVE_NV12,
834             pp_pa_load_save_nv12_gen7,
835             sizeof(pp_pa_load_save_nv12_gen7),
836             NULL,
837         },
838     
839         gen7_pp_plx_avs_initialize,
840     },
841
842     {
843         {
844             "PA_PL3 module",
845             PP_PA_LOAD_SAVE_PL3,
846             pp_pa_load_save_pl3_gen7,
847             sizeof(pp_pa_load_save_pl3_gen7),
848             NULL,
849         },
850     
851         gen7_pp_plx_avs_initialize,
852     },
853
854     {
855         {
856             "PA_PA module",
857             PP_PA_LOAD_SAVE_PA,
858             pp_pa_load_save_pa_gen7,
859             sizeof(pp_pa_load_save_pa_gen7),
860             NULL,
861         },
862
863         gen7_pp_plx_avs_initialize,
864     },
865
866     {
867         {
868             "RGBX_NV12 module",
869             PP_RGBX_LOAD_SAVE_NV12,
870             pp_rgbx_load_save_nv12_gen7,
871             sizeof(pp_rgbx_load_save_nv12_gen7),
872             NULL,
873         },
874     
875         gen7_pp_plx_avs_initialize,
876     },
877
878     {
879         {
880             "NV12_RGBX module",
881             PP_NV12_LOAD_SAVE_RGBX,
882             pp_nv12_load_save_rgbx_gen7,
883             sizeof(pp_nv12_load_save_rgbx_gen7),
884             NULL,
885         },
886     
887         gen7_pp_plx_avs_initialize,
888     },
889             
890 };
891
892 static const uint32_t pp_null_gen75[][4] = {
893 };
894
895 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
896 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
897 };
898
899 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
900 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
901 };
902
903 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
904 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
905 };
906
907 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
908 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
909 };
910
911 static const uint32_t pp_nv12_scaling_gen75[][4] = {
912 #include "shaders/post_processing/gen7/avs.g75b"
913 };
914
915 static const uint32_t pp_nv12_avs_gen75[][4] = {
916 #include "shaders/post_processing/gen7/avs.g75b"
917 };
918
919 static const uint32_t pp_nv12_dndi_gen75[][4] = {
920 // #include "shaders/post_processing/gen7/dndi.g75b"
921 };
922
923 static const uint32_t pp_nv12_dn_gen75[][4] = {
924 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
925 };
926 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
927 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
928 };
929 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
931 };
932 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
933 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
934 };
935 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
936 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
937 };
938 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
939 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
940 };
941 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
942 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
943 };
944 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
945 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
946 };
947
948 static struct pp_module pp_modules_gen75[] = {
949     {
950         {
951             "NULL module (for testing)",
952             PP_NULL,
953             pp_null_gen75,
954             sizeof(pp_null_gen75),
955             NULL,
956         },
957
958         pp_null_initialize,
959     },
960
961     {
962         {
963             "NV12_NV12",
964             PP_NV12_LOAD_SAVE_N12,
965             pp_nv12_load_save_nv12_gen75,
966             sizeof(pp_nv12_load_save_nv12_gen75),
967             NULL,
968         },
969
970         gen7_pp_plx_avs_initialize,
971     },
972
973     {
974         {
975             "NV12_PL3",
976             PP_NV12_LOAD_SAVE_PL3,
977             pp_nv12_load_save_pl3_gen75,
978             sizeof(pp_nv12_load_save_pl3_gen75),
979             NULL,
980         },
981         
982         gen7_pp_plx_avs_initialize,
983     },
984
985     {
986         {
987             "PL3_NV12",
988             PP_PL3_LOAD_SAVE_N12,
989             pp_pl3_load_save_nv12_gen75,
990             sizeof(pp_pl3_load_save_nv12_gen75),
991             NULL,
992         },
993
994         gen7_pp_plx_avs_initialize,
995     },
996
997     {
998         {
999             "PL3_PL3",
1000             PP_PL3_LOAD_SAVE_PL3,
1001             pp_pl3_load_save_pl3_gen75,
1002             sizeof(pp_pl3_load_save_pl3_gen75),
1003             NULL,
1004         },
1005
1006         gen7_pp_plx_avs_initialize,
1007     },
1008
1009     {
1010         {
1011             "NV12 Scaling module",
1012             PP_NV12_SCALING,
1013             pp_nv12_scaling_gen75,
1014             sizeof(pp_nv12_scaling_gen75),
1015             NULL,
1016         },
1017
1018         gen7_pp_plx_avs_initialize,
1019     },
1020
1021     {
1022         {
1023             "NV12 AVS module",
1024             PP_NV12_AVS,
1025             pp_nv12_avs_gen75,
1026             sizeof(pp_nv12_avs_gen75),
1027             NULL,
1028         },
1029
1030         gen7_pp_plx_avs_initialize,
1031     },
1032
1033     {
1034         {
1035             "NV12 DNDI module",
1036             PP_NV12_DNDI,
1037             pp_nv12_dndi_gen75,
1038             sizeof(pp_nv12_dndi_gen75),
1039             NULL,
1040         },
1041
1042         gen7_pp_nv12_dn_initialize,
1043     },
1044
1045     {
1046         {
1047             "NV12 DN module",
1048             PP_NV12_DN,
1049             pp_nv12_dn_gen75,
1050             sizeof(pp_nv12_dn_gen75),
1051             NULL,
1052         },
1053
1054         gen7_pp_nv12_dn_initialize,
1055     },
1056
1057     {
1058         {
1059             "NV12_PA module",
1060             PP_NV12_LOAD_SAVE_PA,
1061             pp_nv12_load_save_pa_gen75,
1062             sizeof(pp_nv12_load_save_pa_gen75),
1063             NULL,
1064         },
1065     
1066         gen7_pp_plx_avs_initialize,
1067     },
1068
1069     {
1070         {
1071             "PL3_PA module",
1072             PP_PL3_LOAD_SAVE_PA,
1073             pp_pl3_load_save_pa_gen75,
1074             sizeof(pp_pl3_load_save_pa_gen75),
1075             NULL,
1076         },
1077     
1078         gen7_pp_plx_avs_initialize,
1079     },
1080
1081     {
1082         {
1083             "PA_NV12 module",
1084             PP_PA_LOAD_SAVE_NV12,
1085             pp_pa_load_save_nv12_gen75,
1086             sizeof(pp_pa_load_save_nv12_gen75),
1087             NULL,
1088         },
1089     
1090         gen7_pp_plx_avs_initialize,
1091     },
1092
1093     {
1094         {
1095             "PA_PL3 module",
1096             PP_PA_LOAD_SAVE_PL3,
1097             pp_pa_load_save_pl3_gen75,
1098             sizeof(pp_pa_load_save_pl3_gen75),
1099             NULL,
1100         },
1101     
1102         gen7_pp_plx_avs_initialize,
1103     },
1104
1105     {
1106         {
1107             "PA_PA module",
1108             PP_PA_LOAD_SAVE_PA,
1109             pp_pa_load_save_pa_gen75,
1110             sizeof(pp_pa_load_save_pa_gen75),
1111             NULL,
1112         },
1113
1114         gen7_pp_plx_avs_initialize,
1115     },
1116
1117     {
1118         {
1119             "RGBX_NV12 module",
1120             PP_RGBX_LOAD_SAVE_NV12,
1121             pp_rgbx_load_save_nv12_gen75,
1122             sizeof(pp_rgbx_load_save_nv12_gen75),
1123             NULL,
1124         },
1125     
1126         gen7_pp_plx_avs_initialize,
1127     },
1128
1129     {
1130         {
1131             "NV12_RGBX module",
1132             PP_NV12_LOAD_SAVE_RGBX,
1133             pp_nv12_load_save_rgbx_gen75,
1134             sizeof(pp_nv12_load_save_rgbx_gen75),
1135             NULL,
1136         },
1137     
1138         gen7_pp_plx_avs_initialize,
1139     },
1140             
1141 };
1142
1143 static void
1144 pp_dndi_frame_store_reset(DNDIFrameStore *fs)
1145 {
1146     fs->obj_surface = NULL;
1147     fs->surface_id = VA_INVALID_ID;
1148     fs->is_scratch_surface = 0;
1149 }
1150
1151 static inline void
1152 pp_dndi_frame_store_swap(DNDIFrameStore *fs1, DNDIFrameStore *fs2)
1153 {
1154     const DNDIFrameStore tmpfs = *fs1;
1155     *fs1 = *fs2;
1156     *fs2 = tmpfs;
1157 }
1158
1159 static inline void
1160 pp_dndi_frame_store_clear(DNDIFrameStore *fs, VADriverContextP ctx)
1161 {
1162     if (fs->obj_surface && fs->is_scratch_surface) {
1163         VASurfaceID va_surface = fs->obj_surface->base.id;
1164         i965_DestroySurfaces(ctx, &va_surface, 1);
1165     }
1166     pp_dndi_frame_store_reset(fs);
1167 }
1168
1169 static void
1170 pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
1171 {
1172     int i;
1173
1174     memset(dndi_ctx, 0, sizeof(*dndi_ctx));
1175     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i++)
1176         pp_dndi_frame_store_reset(&dndi_ctx->frame_store[i]);
1177 }
1178
1179 static VAStatus
1180 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
1181     struct object_surface *obj_surface,
1182     const VAProcPipelineParameterBuffer *pipe_params,
1183     const VAProcFilterParameterBufferDeinterlacing *deint_params)
1184 {
1185     DNDIFrameStore *fs;
1186
1187     dndi_ctx->is_di_enabled = 1;
1188     dndi_ctx->is_di_adv_enabled = 0;
1189     dndi_ctx->is_first_frame = 0;
1190     dndi_ctx->is_second_field = 0;
1191
1192     /* Check whether we are deinterlacing the second field */
1193     if (dndi_ctx->is_di_enabled) {
1194         const unsigned int tff =
1195             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
1196         const unsigned int is_top_field =
1197             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
1198
1199         if ((tff ^ is_top_field) != 0) {
1200             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1201             if (fs->surface_id != obj_surface->base.id) {
1202                 WARN_ONCE("invalid surface provided for second field\n");
1203                 return VA_STATUS_ERROR_INVALID_PARAMETER;
1204             }
1205             dndi_ctx->is_second_field = 1;
1206         }
1207     }
1208
1209     /* Check whether we are deinterlacing the first frame */
1210     if (dndi_ctx->is_di_enabled) {
1211         switch (deint_params->algorithm) {
1212         case VAProcDeinterlacingBob:
1213             dndi_ctx->is_first_frame = 1;
1214             break;
1215         case VAProcDeinterlacingMotionAdaptive:
1216         case VAProcDeinterlacingMotionCompensated:
1217             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1218             if (fs->surface_id == VA_INVALID_ID)
1219                 dndi_ctx->is_first_frame = 1;
1220             else if (dndi_ctx->is_second_field) {
1221                 /* At this stage, we have already deinterlaced the
1222                    first field successfully. So, the first frame flag
1223                    is trigerred if the previous field was deinterlaced
1224                    without reference frame */
1225                 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1226                 if (fs->surface_id == VA_INVALID_ID)
1227                     dndi_ctx->is_first_frame = 1;
1228             }
1229             else {
1230                 if (pipe_params->num_forward_references < 1 ||
1231                     pipe_params->forward_references[0] == VA_INVALID_ID) {
1232                     WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
1233                     return VA_STATUS_ERROR_INVALID_PARAMETER;
1234                 }
1235             }
1236             dndi_ctx->is_di_adv_enabled = 1;
1237             break;
1238         default:
1239             WARN_ONCE("unsupported deinterlacing algorithm (%d)\n",
1240                       deint_params->algorithm);
1241             return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
1242         }
1243     }
1244     return VA_STATUS_SUCCESS;
1245 }
1246
1247 static VAStatus
1248 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
1249     struct i965_post_processing_context *pp_context,
1250     struct object_surface *src_surface, struct object_surface *dst_surface)
1251 {
1252     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1253     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1254     unsigned int src_fourcc, dst_fourcc;
1255     unsigned int src_sampling, dst_sampling;
1256     unsigned int src_tiling, dst_tiling;
1257     unsigned int i, swizzle;
1258     VAStatus status;
1259
1260     /* Determine input surface info. Always use NV12 Y-tiled */
1261     if (src_surface->bo) {
1262         src_fourcc = src_surface->fourcc;
1263         src_sampling = src_surface->subsampling;
1264         dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
1265         src_tiling = !!src_tiling;
1266     }
1267     else {
1268         src_fourcc = VA_FOURCC_NV12;
1269         src_sampling = SUBSAMPLE_YUV420;
1270         src_tiling = 1;
1271         status = i965_check_alloc_surface_bo(ctx, src_surface,
1272             src_tiling, src_fourcc, src_sampling);
1273         if (status != VA_STATUS_SUCCESS)
1274             return status;
1275     }
1276
1277     /* Determine output surface info. Always use NV12 Y-tiled */
1278     if (dst_surface->bo) {
1279         dst_fourcc   = dst_surface->fourcc;
1280         dst_sampling = dst_surface->subsampling;
1281         dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
1282         dst_tiling = !!dst_tiling;
1283     }
1284     else {
1285         dst_fourcc = VA_FOURCC_NV12;
1286         dst_sampling = SUBSAMPLE_YUV420;
1287         dst_tiling = 1;
1288         status = i965_check_alloc_surface_bo(ctx, dst_surface,
1289             dst_tiling, dst_fourcc, dst_sampling);
1290         if (status != VA_STATUS_SUCCESS)
1291             return status;
1292     }
1293
1294     /* Create pipeline surfaces */
1295     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i ++) {
1296         struct object_surface *obj_surface;
1297         VASurfaceID new_surface;
1298         unsigned int width, height;
1299
1300         if (dndi_ctx->frame_store[i].obj_surface &&
1301             dndi_ctx->frame_store[i].obj_surface->bo)
1302             continue; // user allocated surface, not VPP internal
1303
1304         if (dndi_ctx->frame_store[i].obj_surface) {
1305             obj_surface = dndi_ctx->frame_store[i].obj_surface;
1306             dndi_ctx->frame_store[i].is_scratch_surface = 0;
1307         } else {
1308             if (i <= DNDI_FRAME_IN_STMM) {
1309                 width = src_surface->orig_width;
1310                 height = src_surface->orig_height;
1311             }
1312             else {
1313                 width = dst_surface->orig_width;
1314                 height = dst_surface->orig_height;
1315             }
1316
1317             status = i965_CreateSurfaces(ctx, width, height, VA_RT_FORMAT_YUV420,
1318                                          1, &new_surface);
1319             if (status != VA_STATUS_SUCCESS)
1320                 return status;
1321
1322             obj_surface = SURFACE(new_surface);
1323             assert(obj_surface != NULL);
1324             dndi_ctx->frame_store[i].is_scratch_surface = 1;
1325         }
1326
1327         if (i <= DNDI_FRAME_IN_PREVIOUS) {
1328             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1329                 src_tiling, src_fourcc, src_sampling);
1330         }
1331         else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
1332             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1333                 1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
1334         }
1335         else if (i >= DNDI_FRAME_OUT_CURRENT) {
1336             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1337                 dst_tiling, dst_fourcc, dst_sampling);
1338         }
1339         if (status != VA_STATUS_SUCCESS)
1340             return status;
1341
1342         dndi_ctx->frame_store[i].obj_surface = obj_surface;
1343     }
1344     return VA_STATUS_SUCCESS;
1345 }
1346
1347 static VAStatus
1348 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
1349     struct i965_post_processing_context *pp_context,
1350     struct object_surface *src_surface, struct object_surface *dst_surface)
1351 {
1352     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1353     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1354     DNDIFrameStore *ifs, *ofs;
1355     bool is_new_frame = false;
1356
1357     /* Update the previous input surface */
1358     is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
1359         src_surface->base.id;
1360     if (is_new_frame) {
1361         ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1362         ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1363         do {
1364             const VAProcPipelineParameterBuffer * const pipe_params =
1365                 pp_context->pipeline_param;
1366             struct object_surface *obj_surface;
1367
1368             if (pipe_params->num_forward_references < 1)
1369                 break;
1370             if (pipe_params->forward_references[0] == VA_INVALID_ID)
1371                 break;
1372
1373             obj_surface = SURFACE(pipe_params->forward_references[0]);
1374             if (!obj_surface || obj_surface->base.id == ifs->surface_id)
1375                 break;
1376
1377             pp_dndi_frame_store_clear(ifs, ctx);
1378             if (obj_surface->base.id == ofs->surface_id) {
1379                 *ifs = *ofs;
1380                 pp_dndi_frame_store_reset(ofs);
1381             }
1382             else {
1383                 ifs->obj_surface = obj_surface;
1384                 ifs->surface_id = obj_surface->base.id;
1385             }
1386         } while (0);
1387     }
1388
1389     /* Update the input surface */
1390     ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1391     pp_dndi_frame_store_clear(ifs, ctx);
1392     ifs->obj_surface = src_surface;
1393     ifs->surface_id = src_surface->base.id;
1394
1395     /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
1396     if (is_new_frame)
1397         pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
1398             &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
1399
1400     /* Update the output surfaces */
1401     ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
1402     if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
1403         pp_dndi_frame_store_swap(ofs,
1404             &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
1405         if (!dndi_ctx->is_second_field)
1406             ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
1407     }
1408     pp_dndi_frame_store_clear(ofs, ctx);
1409     ofs->obj_surface = dst_surface;
1410     ofs->surface_id = dst_surface->base.id;
1411
1412     return VA_STATUS_SUCCESS;
1413 }
1414
1415 static int
1416 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1417 {
1418     int fourcc;
1419
1420     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1421         struct object_image *obj_image = (struct object_image *)surface->base;
1422         fourcc = obj_image->image.format.fourcc;
1423     } else {
1424         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1425         fourcc = obj_surface->fourcc;
1426     }
1427
1428     return fourcc;
1429 }
1430
1431 static void
1432 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1433 {
1434     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1435         struct object_image *obj_image = (struct object_image *)surface->base;
1436
1437         *width = obj_image->image.width;
1438         *height = obj_image->image.height;
1439     } else {
1440         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1441
1442         *width = obj_surface->orig_width;
1443         *height = obj_surface->orig_height;
1444     }
1445 }
1446
1447 static void
1448 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1449 {
1450     switch (tiling) {
1451     case I915_TILING_NONE:
1452         ss->ss3.tiled_surface = 0;
1453         ss->ss3.tile_walk = 0;
1454         break;
1455     case I915_TILING_X:
1456         ss->ss3.tiled_surface = 1;
1457         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1458         break;
1459     case I915_TILING_Y:
1460         ss->ss3.tiled_surface = 1;
1461         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1462         break;
1463     }
1464 }
1465
1466 static void
1467 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1468 {
1469     switch (tiling) {
1470     case I915_TILING_NONE:
1471         ss->ss2.tiled_surface = 0;
1472         ss->ss2.tile_walk = 0;
1473         break;
1474     case I915_TILING_X:
1475         ss->ss2.tiled_surface = 1;
1476         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1477         break;
1478     case I915_TILING_Y:
1479         ss->ss2.tiled_surface = 1;
1480         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1481         break;
1482     }
1483 }
1484
1485 static void
1486 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1487 {
1488     switch (tiling) {
1489     case I915_TILING_NONE:
1490         ss->ss0.tiled_surface = 0;
1491         ss->ss0.tile_walk = 0;
1492         break;
1493     case I915_TILING_X:
1494         ss->ss0.tiled_surface = 1;
1495         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1496         break;
1497     case I915_TILING_Y:
1498         ss->ss0.tiled_surface = 1;
1499         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1500         break;
1501     }
1502 }
1503
1504 static void
1505 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1506 {
1507     switch (tiling) {
1508     case I915_TILING_NONE:
1509         ss->ss2.tiled_surface = 0;
1510         ss->ss2.tile_walk = 0;
1511         break;
1512     case I915_TILING_X:
1513         ss->ss2.tiled_surface = 1;
1514         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1515         break;
1516     case I915_TILING_Y:
1517         ss->ss2.tiled_surface = 1;
1518         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1519         break;
1520     }
1521 }
1522
1523 static void
1524 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1525 {
1526     struct i965_interface_descriptor *desc;
1527     dri_bo *bo;
1528     int pp_index = pp_context->current_pp;
1529
1530     bo = pp_context->idrt.bo;
1531     dri_bo_map(bo, 1);
1532     assert(bo->virtual);
1533     desc = bo->virtual;
1534     memset(desc, 0, sizeof(*desc));
1535     desc->desc0.grf_reg_blocks = 10;
1536     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1537     desc->desc1.const_urb_entry_read_offset = 0;
1538     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1539     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1540     desc->desc2.sampler_count = 0;
1541     desc->desc3.binding_table_entry_count = 0;
1542     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1543
1544     dri_bo_emit_reloc(bo,
1545                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1546                       desc->desc0.grf_reg_blocks,
1547                       offsetof(struct i965_interface_descriptor, desc0),
1548                       pp_context->pp_modules[pp_index].kernel.bo);
1549
1550     dri_bo_emit_reloc(bo,
1551                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1552                       desc->desc2.sampler_count << 2,
1553                       offsetof(struct i965_interface_descriptor, desc2),
1554                       pp_context->sampler_state_table.bo);
1555
1556     dri_bo_unmap(bo);
1557     pp_context->idrt.num_interface_descriptors++;
1558 }
1559
1560 static void
1561 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1562 {
1563     struct i965_vfe_state *vfe_state;
1564     dri_bo *bo;
1565
1566     bo = pp_context->vfe_state.bo;
1567     dri_bo_map(bo, 1);
1568     assert(bo->virtual);
1569     vfe_state = bo->virtual;
1570     memset(vfe_state, 0, sizeof(*vfe_state));
1571     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1572     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1573     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1574     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1575     vfe_state->vfe1.children_present = 0;
1576     vfe_state->vfe2.interface_descriptor_base = 
1577         pp_context->idrt.bo->offset >> 4; /* reloc */
1578     dri_bo_emit_reloc(bo,
1579                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1580                       0,
1581                       offsetof(struct i965_vfe_state, vfe2),
1582                       pp_context->idrt.bo);
1583     dri_bo_unmap(bo);
1584 }
1585
1586 static void
1587 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1588 {
1589     unsigned char *constant_buffer;
1590     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1591
1592     assert(sizeof(*pp_static_parameter) == 128);
1593     dri_bo_map(pp_context->curbe.bo, 1);
1594     assert(pp_context->curbe.bo->virtual);
1595     constant_buffer = pp_context->curbe.bo->virtual;
1596     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1597     dri_bo_unmap(pp_context->curbe.bo);
1598 }
1599
1600 static void
1601 ironlake_pp_states_setup(VADriverContextP ctx,
1602                          struct i965_post_processing_context *pp_context)
1603 {
1604     ironlake_pp_interface_descriptor_table(pp_context);
1605     ironlake_pp_vfe_state(pp_context);
1606     ironlake_pp_upload_constants(pp_context);
1607 }
1608
1609 static void
1610 ironlake_pp_pipeline_select(VADriverContextP ctx,
1611                             struct i965_post_processing_context *pp_context)
1612 {
1613     struct intel_batchbuffer *batch = pp_context->batch;
1614
1615     BEGIN_BATCH(batch, 1);
1616     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1617     ADVANCE_BATCH(batch);
1618 }
1619
1620 static void
1621 ironlake_pp_urb_layout(VADriverContextP ctx,
1622                        struct i965_post_processing_context *pp_context)
1623 {
1624     struct intel_batchbuffer *batch = pp_context->batch;
1625     unsigned int vfe_fence, cs_fence;
1626
1627     vfe_fence = pp_context->urb.cs_start;
1628     cs_fence = pp_context->urb.size;
1629
1630     BEGIN_BATCH(batch, 3);
1631     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1632     OUT_BATCH(batch, 0);
1633     OUT_BATCH(batch, 
1634               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1635               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1636     ADVANCE_BATCH(batch);
1637 }
1638
1639 static void
1640 ironlake_pp_state_base_address(VADriverContextP ctx,
1641                                struct i965_post_processing_context *pp_context)
1642 {
1643     struct intel_batchbuffer *batch = pp_context->batch;
1644
1645     BEGIN_BATCH(batch, 8);
1646     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1647     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1648     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1649     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1650     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1651     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1652     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1653     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1654     ADVANCE_BATCH(batch);
1655 }
1656
1657 static void
1658 ironlake_pp_state_pointers(VADriverContextP ctx,
1659                            struct i965_post_processing_context *pp_context)
1660 {
1661     struct intel_batchbuffer *batch = pp_context->batch;
1662
1663     BEGIN_BATCH(batch, 3);
1664     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1665     OUT_BATCH(batch, 0);
1666     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1667     ADVANCE_BATCH(batch);
1668 }
1669
1670 static void 
1671 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1672                           struct i965_post_processing_context *pp_context)
1673 {
1674     struct intel_batchbuffer *batch = pp_context->batch;
1675
1676     BEGIN_BATCH(batch, 2);
1677     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1678     OUT_BATCH(batch,
1679               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1680               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1681     ADVANCE_BATCH(batch);
1682 }
1683
1684 static void
1685 ironlake_pp_constant_buffer(VADriverContextP ctx,
1686                             struct i965_post_processing_context *pp_context)
1687 {
1688     struct intel_batchbuffer *batch = pp_context->batch;
1689
1690     BEGIN_BATCH(batch, 2);
1691     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1692     OUT_RELOC(batch, pp_context->curbe.bo,
1693               I915_GEM_DOMAIN_INSTRUCTION, 0,
1694               pp_context->urb.size_cs_entry - 1);
1695     ADVANCE_BATCH(batch);    
1696 }
1697
1698 static void
1699 ironlake_pp_object_walker(VADriverContextP ctx,
1700                           struct i965_post_processing_context *pp_context)
1701 {
1702     struct intel_batchbuffer *batch = pp_context->batch;
1703     int x, x_steps, y, y_steps;
1704     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1705
1706     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1707     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1708
1709     for (y = 0; y < y_steps; y++) {
1710         for (x = 0; x < x_steps; x++) {
1711             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1712                 BEGIN_BATCH(batch, 20);
1713                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1714                 OUT_BATCH(batch, 0);
1715                 OUT_BATCH(batch, 0); /* no indirect data */
1716                 OUT_BATCH(batch, 0);
1717
1718                 /* inline data grf 5-6 */
1719                 assert(sizeof(*pp_inline_parameter) == 64);
1720                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1721
1722                 ADVANCE_BATCH(batch);
1723             }
1724         }
1725     }
1726 }
1727
1728 static void
1729 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1730                            struct i965_post_processing_context *pp_context)
1731 {
1732     struct intel_batchbuffer *batch = pp_context->batch;
1733
1734     intel_batchbuffer_start_atomic(batch, 0x1000);
1735     intel_batchbuffer_emit_mi_flush(batch);
1736     ironlake_pp_pipeline_select(ctx, pp_context);
1737     ironlake_pp_state_base_address(ctx, pp_context);
1738     ironlake_pp_state_pointers(ctx, pp_context);
1739     ironlake_pp_urb_layout(ctx, pp_context);
1740     ironlake_pp_cs_urb_layout(ctx, pp_context);
1741     ironlake_pp_constant_buffer(ctx, pp_context);
1742     ironlake_pp_object_walker(ctx, pp_context);
1743     intel_batchbuffer_end_atomic(batch);
1744 }
1745
1746 // update u/v offset when the surface format are packed yuv
1747 static void i965_update_src_surface_static_parameter(
1748     VADriverContextP    ctx, 
1749     struct i965_post_processing_context *pp_context,
1750     const struct i965_surface *surface)
1751 {
1752     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1753     int fourcc = pp_get_surface_fourcc(ctx, surface);
1754
1755     switch (fourcc) {
1756     case VA_FOURCC_YUY2:
1757         pp_static_parameter->grf1.source_packed_u_offset = 1;
1758         pp_static_parameter->grf1.source_packed_v_offset = 3;
1759         break;
1760     case VA_FOURCC_UYVY:
1761         pp_static_parameter->grf1.source_packed_y_offset = 1;
1762         pp_static_parameter->grf1.source_packed_v_offset = 2;
1763         break;
1764     case VA_FOURCC_BGRX:
1765     case VA_FOURCC_BGRA:
1766         pp_static_parameter->grf1.source_rgb_layout = 0;
1767         break;
1768     case VA_FOURCC_RGBX:
1769     case VA_FOURCC_RGBA:
1770         pp_static_parameter->grf1.source_rgb_layout = 1;
1771         break;
1772     default:
1773         break;
1774     }
1775     
1776 }
1777
1778 static void i965_update_dst_surface_static_parameter(
1779     VADriverContextP    ctx, 
1780     struct i965_post_processing_context *pp_context,
1781     const struct i965_surface *surface)
1782 {
1783     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1784     int fourcc = pp_get_surface_fourcc(ctx, surface);
1785
1786     switch (fourcc) {
1787     case VA_FOURCC_YUY2:
1788         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1789         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1790         break;
1791     case VA_FOURCC_UYVY:
1792         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1793         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1794         break;
1795     case VA_FOURCC_BGRX:
1796     case VA_FOURCC_BGRA:
1797         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1798         break;
1799     case VA_FOURCC_RGBX:
1800     case VA_FOURCC_RGBA:
1801         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1802         break;
1803     default:
1804         break;
1805     }
1806     
1807 }
1808
1809 static void
1810 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1811                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1812                           int width, int height, int pitch, int format, 
1813                           int index, int is_target)
1814 {
1815     struct i965_surface_state *ss;
1816     dri_bo *ss_bo;
1817     unsigned int tiling;
1818     unsigned int swizzle;
1819
1820     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1821     ss_bo = pp_context->surface_state_binding_table.bo;
1822     assert(ss_bo);
1823
1824     dri_bo_map(ss_bo, True);
1825     assert(ss_bo->virtual);
1826     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1827     memset(ss, 0, sizeof(*ss));
1828     ss->ss0.surface_type = I965_SURFACE_2D;
1829     ss->ss0.surface_format = format;
1830     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1831     ss->ss2.width = width - 1;
1832     ss->ss2.height = height - 1;
1833     ss->ss3.pitch = pitch - 1;
1834     pp_set_surface_tiling(ss, tiling);
1835     dri_bo_emit_reloc(ss_bo,
1836                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1837                       surf_bo_offset,
1838                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1839                       surf_bo);
1840     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1841     dri_bo_unmap(ss_bo);
1842 }
1843
1844 static void
1845 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1846                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1847                            int width, int height, int wpitch,
1848                            int xoffset, int yoffset,
1849                            int format, int interleave_chroma,
1850                            int index)
1851 {
1852     struct i965_surface_state2 *ss2;
1853     dri_bo *ss2_bo;
1854     unsigned int tiling;
1855     unsigned int swizzle;
1856
1857     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1858     ss2_bo = pp_context->surface_state_binding_table.bo;
1859     assert(ss2_bo);
1860
1861     dri_bo_map(ss2_bo, True);
1862     assert(ss2_bo->virtual);
1863     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1864     memset(ss2, 0, sizeof(*ss2));
1865     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1866     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1867     ss2->ss1.width = width - 1;
1868     ss2->ss1.height = height - 1;
1869     ss2->ss2.pitch = wpitch - 1;
1870     ss2->ss2.interleave_chroma = interleave_chroma;
1871     ss2->ss2.surface_format = format;
1872     ss2->ss3.x_offset_for_cb = xoffset;
1873     ss2->ss3.y_offset_for_cb = yoffset;
1874     pp_set_surface2_tiling(ss2, tiling);
1875     dri_bo_emit_reloc(ss2_bo,
1876                       I915_GEM_DOMAIN_RENDER, 0,
1877                       surf_bo_offset,
1878                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1879                       surf_bo);
1880     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1881     dri_bo_unmap(ss2_bo);
1882 }
1883
1884 static void
1885 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1886                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1887                           int width, int height, int pitch, int format, 
1888                           int index, int is_target)
1889 {
1890     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1891     struct gen7_surface_state *ss;
1892     dri_bo *ss_bo;
1893     unsigned int tiling;
1894     unsigned int swizzle;
1895
1896     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1897     ss_bo = pp_context->surface_state_binding_table.bo;
1898     assert(ss_bo);
1899
1900     dri_bo_map(ss_bo, True);
1901     assert(ss_bo->virtual);
1902     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1903     memset(ss, 0, sizeof(*ss));
1904     ss->ss0.surface_type = I965_SURFACE_2D;
1905     ss->ss0.surface_format = format;
1906     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1907     ss->ss2.width = width - 1;
1908     ss->ss2.height = height - 1;
1909     ss->ss3.pitch = pitch - 1;
1910     gen7_pp_set_surface_tiling(ss, tiling);
1911     if (IS_HASWELL(i965->intel.device_info))
1912         gen7_render_set_surface_scs(ss);
1913     dri_bo_emit_reloc(ss_bo,
1914                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1915                       surf_bo_offset,
1916                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1917                       surf_bo);
1918     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1919     dri_bo_unmap(ss_bo);
1920 }
1921
1922 static void
1923 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1924                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1925                            int width, int height, int wpitch,
1926                            int xoffset, int yoffset,
1927                            int format, int interleave_chroma,
1928                            int index)
1929 {
1930     struct gen7_surface_state2 *ss2;
1931     dri_bo *ss2_bo;
1932     unsigned int tiling;
1933     unsigned int swizzle;
1934
1935     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1936     ss2_bo = pp_context->surface_state_binding_table.bo;
1937     assert(ss2_bo);
1938
1939     dri_bo_map(ss2_bo, True);
1940     assert(ss2_bo->virtual);
1941     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1942     memset(ss2, 0, sizeof(*ss2));
1943     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1944     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1945     ss2->ss1.width = width - 1;
1946     ss2->ss1.height = height - 1;
1947     ss2->ss2.pitch = wpitch - 1;
1948     ss2->ss2.interleave_chroma = interleave_chroma;
1949     ss2->ss2.surface_format = format;
1950     ss2->ss3.x_offset_for_cb = xoffset;
1951     ss2->ss3.y_offset_for_cb = yoffset;
1952     gen7_pp_set_surface2_tiling(ss2, tiling);
1953     dri_bo_emit_reloc(ss2_bo,
1954                       I915_GEM_DOMAIN_RENDER, 0,
1955                       surf_bo_offset,
1956                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1957                       surf_bo);
1958     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1959     dri_bo_unmap(ss2_bo);
1960 }
1961
1962 static void 
1963 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1964                                 const struct i965_surface *surface, 
1965                                 int base_index, int is_target,
1966                                 int *width, int *height, int *pitch, int *offset)
1967 {
1968     struct object_surface *obj_surface;
1969     struct object_image *obj_image;
1970     dri_bo *bo;
1971     int fourcc = pp_get_surface_fourcc(ctx, surface);
1972     const int Y = 0;
1973     const int U = ((fourcc == VA_FOURCC_YV12) ||
1974                    (fourcc == VA_FOURCC_YV16))
1975                    ? 2 : 1;
1976     const int V = ((fourcc == VA_FOURCC_YV12) ||
1977                    (fourcc == VA_FOURCC_YV16))
1978                    ? 1 : 2;
1979     const int UV = 1;
1980     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1981     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1982     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1983                               fourcc == VA_FOURCC_RGBX ||
1984                               fourcc == VA_FOURCC_BGRA ||
1985                               fourcc == VA_FOURCC_BGRX);
1986     int scale_factor_of_1st_plane_width_in_byte = 1;
1987                               
1988     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1989         obj_surface = (struct object_surface *)surface->base;
1990         bo = obj_surface->bo;
1991         width[0] = obj_surface->orig_width;
1992         height[0] = obj_surface->orig_height;
1993         pitch[0] = obj_surface->width;
1994         offset[0] = 0;
1995
1996         if (full_packed_format) {
1997             scale_factor_of_1st_plane_width_in_byte = 4; 
1998         }
1999         else if (packed_yuv ) {
2000             scale_factor_of_1st_plane_width_in_byte =  2; 
2001         }
2002         else if (interleaved_uv) {
2003             width[1] = obj_surface->orig_width;
2004             height[1] = obj_surface->orig_height / 2;
2005             pitch[1] = obj_surface->width;
2006             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2007         } else {
2008             width[1] = obj_surface->orig_width / 2;
2009             height[1] = obj_surface->orig_height / 2;
2010             pitch[1] = obj_surface->width / 2;
2011             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2012             width[2] = obj_surface->orig_width / 2;
2013             height[2] = obj_surface->orig_height / 2;
2014             pitch[2] = obj_surface->width / 2;
2015             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2016         }
2017     } else {
2018         obj_image = (struct object_image *)surface->base;
2019         bo = obj_image->bo;
2020         width[0] = obj_image->image.width;
2021         height[0] = obj_image->image.height;
2022         pitch[0] = obj_image->image.pitches[0];
2023         offset[0] = obj_image->image.offsets[0];
2024
2025         if (full_packed_format) {
2026             scale_factor_of_1st_plane_width_in_byte = 4;
2027         }
2028         else if (packed_yuv ) {
2029             scale_factor_of_1st_plane_width_in_byte = 2;
2030         }
2031         else if (interleaved_uv) {
2032             width[1] = obj_image->image.width;
2033             height[1] = obj_image->image.height / 2;
2034             pitch[1] = obj_image->image.pitches[1];
2035             offset[1] = obj_image->image.offsets[1];
2036         } else {
2037             width[1] = obj_image->image.width / 2;
2038             height[1] = obj_image->image.height / 2;
2039             pitch[1] = obj_image->image.pitches[1];
2040             offset[1] = obj_image->image.offsets[1];
2041             width[2] = obj_image->image.width / 2;
2042             height[2] = obj_image->image.height / 2;
2043             pitch[2] = obj_image->image.pitches[2];
2044             offset[2] = obj_image->image.offsets[2];
2045             if (fourcc == VA_FOURCC_YV16) {
2046                 width[1] = obj_image->image.width / 2;
2047                 height[1] = obj_image->image.height;
2048                 width[2] = obj_image->image.width / 2;
2049                 height[2] = obj_image->image.height;
2050             }
2051         }
2052     }
2053
2054     /* Y surface */
2055     i965_pp_set_surface_state(ctx, pp_context,
2056                               bo, offset[Y],
2057                               ALIGN(width[Y] *scale_factor_of_1st_plane_width_in_byte, 4) / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2058                               base_index, is_target);
2059
2060     if (!packed_yuv && !full_packed_format) {
2061         if (interleaved_uv) {
2062             i965_pp_set_surface_state(ctx, pp_context,
2063                                       bo, offset[UV],
2064                                       ALIGN(width[UV], 4) / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2065                                       base_index + 1, is_target);
2066         } else {
2067             /* U surface */
2068             i965_pp_set_surface_state(ctx, pp_context,
2069                                       bo, offset[U],
2070                                       ALIGN(width[U], 4) / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2071                                       base_index + 1, is_target);
2072
2073             /* V surface */
2074             i965_pp_set_surface_state(ctx, pp_context,
2075                                       bo, offset[V],
2076                                       ALIGN(width[V], 4) / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2077                                       base_index + 2, is_target);
2078         }
2079     }
2080
2081 }
2082
2083 static void 
2084 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2085                                      const struct i965_surface *surface, 
2086                                      int base_index, int is_target,
2087                                      const VARectangle *rect,
2088                                      int *width, int *height, int *pitch, int *offset)
2089 {
2090     struct object_surface *obj_surface;
2091     struct object_image *obj_image;
2092     dri_bo *bo;
2093     int fourcc = pp_get_surface_fourcc(ctx, surface);
2094     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
2095
2096     if (fourcc_info == NULL)
2097         return;
2098
2099     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2100         obj_surface = (struct object_surface *)surface->base;
2101         bo = obj_surface->bo;
2102         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
2103         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
2104         pitch[0] = obj_surface->width;
2105         offset[0] = 0;
2106
2107         if (fourcc_info->num_planes == 1 && is_target)
2108             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2109
2110         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2111         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2112         pitch[1] = obj_surface->cb_cr_pitch;
2113         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2114
2115         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2116         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2117         pitch[2] = obj_surface->cb_cr_pitch;
2118         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2119     } else {
2120         int U = 0, V = 0;
2121
2122         /* FIXME: add support for ARGB/ABGR image */
2123         obj_image = (struct object_image *)surface->base;
2124         bo = obj_image->bo;
2125         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
2126         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
2127         pitch[0] = obj_image->image.pitches[0];
2128         offset[0] = obj_image->image.offsets[0];
2129
2130         if (fourcc_info->num_planes == 1) {
2131             if (is_target)
2132                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2133         } else if (fourcc_info->num_planes == 2) {
2134             U = 1, V = 1;
2135         } else {
2136             assert(fourcc_info->num_components == 3);
2137
2138             U = fourcc_info->components[1].plane;
2139             V = fourcc_info->components[2].plane;
2140             assert((U == 1 && V == 2) ||
2141                    (U == 2 && V == 1));
2142         }
2143
2144         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
2145         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2146         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2147         pitch[1] = obj_image->image.pitches[U];
2148         offset[1] = obj_image->image.offsets[U];
2149
2150         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2151         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2152         pitch[2] = obj_image->image.pitches[V];
2153         offset[2] = obj_image->image.offsets[V];
2154     }
2155
2156     if (is_target) {
2157         gen7_pp_set_surface_state(ctx, pp_context,
2158                                   bo, 0,
2159                                   ALIGN(width[0], 4) / 4, height[0], pitch[0],
2160                                   I965_SURFACEFORMAT_R8_UINT,
2161                                   base_index, 1);
2162
2163         if (fourcc_info->num_planes == 2) {
2164             gen7_pp_set_surface_state(ctx, pp_context,
2165                                       bo, offset[1],
2166                                       ALIGN(width[1], 2) / 2, height[1], pitch[1],
2167                                       I965_SURFACEFORMAT_R8G8_SINT,
2168                                       base_index + 1, 1);
2169         } else if (fourcc_info->num_planes == 3) {
2170             gen7_pp_set_surface_state(ctx, pp_context,
2171                                       bo, offset[1],
2172                                       ALIGN(width[1], 4) / 4, height[1], pitch[1],
2173                                       I965_SURFACEFORMAT_R8_SINT,
2174                                       base_index + 1, 1);
2175             gen7_pp_set_surface_state(ctx, pp_context,
2176                                       bo, offset[2],
2177                                       ALIGN(width[2], 4) / 4, height[2], pitch[2],
2178                                       I965_SURFACEFORMAT_R8_SINT,
2179                                       base_index + 2, 1);
2180         }
2181
2182         if (fourcc_info->format == I965_COLOR_RGB) {
2183             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2184             /* the format is MSB: X-B-G-R */
2185             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2186             if ((fourcc == VA_FOURCC_BGRA) ||
2187                 (fourcc == VA_FOURCC_BGRX)) {
2188                 /* It is stored as MSB: X-R-G-B */
2189                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2190             }
2191         }
2192     } else {
2193         int format0 = SURFACE_FORMAT_Y8_UNORM;
2194
2195         switch (fourcc) {
2196         case VA_FOURCC_YUY2:
2197             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2198             break;
2199
2200         case VA_FOURCC_UYVY:
2201             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2202             break;
2203
2204         default:
2205             break;
2206         }
2207
2208         if (fourcc_info->format == I965_COLOR_RGB) {
2209             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2210             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2211             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2212             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2213             if ((fourcc == VA_FOURCC_BGRA) ||
2214                 (fourcc == VA_FOURCC_BGRX)) {
2215                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2216             }
2217         }
2218
2219         gen7_pp_set_surface2_state(ctx, pp_context,
2220                                    bo, offset[0],
2221                                    width[0], height[0], pitch[0],
2222                                    0, 0,
2223                                    format0, 0,
2224                                    base_index);
2225
2226         if (fourcc_info->num_planes == 2) {
2227             gen7_pp_set_surface2_state(ctx, pp_context,
2228                                        bo, offset[1],
2229                                        width[1], height[1], pitch[1],
2230                                        0, 0,
2231                                        SURFACE_FORMAT_R8B8_UNORM, 0,
2232                                        base_index + 1);
2233         } else if (fourcc_info->num_planes == 3) {
2234             gen7_pp_set_surface2_state(ctx, pp_context,
2235                                        bo, offset[1],
2236                                        width[1], height[1], pitch[1],
2237                                        0, 0,
2238                                        SURFACE_FORMAT_R8_UNORM, 0,
2239                                        base_index + 1);
2240             gen7_pp_set_surface2_state(ctx, pp_context,
2241                                        bo, offset[2],
2242                                        width[2], height[2], pitch[2],
2243                                        0, 0,
2244                                        SURFACE_FORMAT_R8_UNORM, 0,
2245                                        base_index + 2);
2246         }
2247     }
2248 }
2249
2250 static int
2251 pp_null_x_steps(void *private_context)
2252 {
2253     return 1;
2254 }
2255
2256 static int
2257 pp_null_y_steps(void *private_context)
2258 {
2259     return 1;
2260 }
2261
2262 static int
2263 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2264 {
2265     return 0;
2266 }
2267
2268 static VAStatus
2269 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2270                    const struct i965_surface *src_surface,
2271                    const VARectangle *src_rect,
2272                    struct i965_surface *dst_surface,
2273                    const VARectangle *dst_rect,
2274                    void *filter_param)
2275 {
2276     /* private function & data */
2277     pp_context->pp_x_steps = pp_null_x_steps;
2278     pp_context->pp_y_steps = pp_null_y_steps;
2279     pp_context->private_context = NULL;
2280     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2281
2282     dst_surface->flags = src_surface->flags;
2283
2284     return VA_STATUS_SUCCESS;
2285 }
2286
2287 static int
2288 pp_load_save_x_steps(void *private_context)
2289 {
2290     return 1;
2291 }
2292
2293 static int
2294 pp_load_save_y_steps(void *private_context)
2295 {
2296     struct pp_load_save_context *pp_load_save_context = private_context;
2297
2298     return pp_load_save_context->dest_h / 8;
2299 }
2300
2301 static int
2302 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2303 {
2304     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2305     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2306
2307     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2308     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2309
2310     return 0;
2311 }
2312
2313 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2314 {
2315     int i;
2316     /* x offset of dest surface must be dword aligned.
2317      * so we have to extend dst surface on left edge, and mask out pixels not interested
2318      */
2319     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2320         pp_context->block_horizontal_mask_left = 0;
2321         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2322         {
2323             pp_context->block_horizontal_mask_left |= 1<<i;
2324         }
2325     }
2326     else {
2327         pp_context->block_horizontal_mask_left = 0xffff;
2328     }
2329     
2330     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2331     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2332         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2333     }
2334     else {
2335         pp_context->block_horizontal_mask_right = 0xffff;
2336     }
2337     
2338     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2339         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2340     }
2341     else {
2342         pp_context->block_vertical_mask_bottom = 0xff;
2343     }
2344
2345 }
2346 static VAStatus
2347 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2348                                 const struct i965_surface *src_surface,
2349                                 const VARectangle *src_rect,
2350                                 struct i965_surface *dst_surface,
2351                                 const VARectangle *dst_rect,
2352                                 void *filter_param)
2353 {
2354     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2355     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2356     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2357     int width[3], height[3], pitch[3], offset[3];
2358
2359     /* source surface */
2360     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2361                                     width, height, pitch, offset);
2362
2363     /* destination surface */
2364     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2365                                     width, height, pitch, offset);
2366
2367     /* private function & data */
2368     pp_context->pp_x_steps = pp_load_save_x_steps;
2369     pp_context->pp_y_steps = pp_load_save_y_steps;
2370     pp_context->private_context = &pp_context->pp_load_save_context;
2371     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2372
2373     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2374     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2375     pp_load_save_context->dest_y = dst_rect->y;
2376     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2377     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2378
2379     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2380     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2381
2382     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2383     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2384
2385     // update u/v offset for packed yuv
2386     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2387     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2388
2389     dst_surface->flags = src_surface->flags;
2390
2391     return VA_STATUS_SUCCESS;
2392 }
2393
2394 static int
2395 pp_scaling_x_steps(void *private_context)
2396 {
2397     return 1;
2398 }
2399
2400 static int
2401 pp_scaling_y_steps(void *private_context)
2402 {
2403     struct pp_scaling_context *pp_scaling_context = private_context;
2404
2405     return pp_scaling_context->dest_h / 8;
2406 }
2407
2408 static int
2409 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2410 {
2411     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2412     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2413     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2414     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2415     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2416
2417     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2418     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2419     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2420     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2421     
2422     return 0;
2423 }
2424
2425 static VAStatus
2426 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2427                            const struct i965_surface *src_surface,
2428                            const VARectangle *src_rect,
2429                            struct i965_surface *dst_surface,
2430                            const VARectangle *dst_rect,
2431                            void *filter_param)
2432 {
2433     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2434     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2435     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2436     struct object_surface *obj_surface;
2437     struct i965_sampler_state *sampler_state;
2438     int in_w, in_h, in_wpitch, in_hpitch;
2439     int out_w, out_h, out_wpitch, out_hpitch;
2440
2441     /* source surface */
2442     obj_surface = (struct object_surface *)src_surface->base;
2443     in_w = obj_surface->orig_width;
2444     in_h = obj_surface->orig_height;
2445     in_wpitch = obj_surface->width;
2446     in_hpitch = obj_surface->height;
2447
2448     /* source Y surface index 1 */
2449     i965_pp_set_surface_state(ctx, pp_context,
2450                               obj_surface->bo, 0,
2451                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2452                               1, 0);
2453
2454     /* source UV surface index 2 */
2455     i965_pp_set_surface_state(ctx, pp_context,
2456                               obj_surface->bo, in_wpitch * in_hpitch,
2457                               ALIGN(in_w, 2) / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2458                               2, 0);
2459
2460     /* destination surface */
2461     obj_surface = (struct object_surface *)dst_surface->base;
2462     out_w = obj_surface->orig_width;
2463     out_h = obj_surface->orig_height;
2464     out_wpitch = obj_surface->width;
2465     out_hpitch = obj_surface->height;
2466
2467     /* destination Y surface index 7 */
2468     i965_pp_set_surface_state(ctx, pp_context,
2469                               obj_surface->bo, 0,
2470                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2471                               7, 1);
2472
2473     /* destination UV surface index 8 */
2474     i965_pp_set_surface_state(ctx, pp_context,
2475                               obj_surface->bo, out_wpitch * out_hpitch,
2476                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2477                               8, 1);
2478
2479     /* sampler state */
2480     dri_bo_map(pp_context->sampler_state_table.bo, True);
2481     assert(pp_context->sampler_state_table.bo->virtual);
2482     sampler_state = pp_context->sampler_state_table.bo->virtual;
2483
2484     /* SIMD16 Y index 1 */
2485     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2486     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2487     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2488     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2489     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2490
2491     /* SIMD16 UV index 2 */
2492     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2493     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2494     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2495     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2496     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2497
2498     dri_bo_unmap(pp_context->sampler_state_table.bo);
2499
2500     /* private function & data */
2501     pp_context->pp_x_steps = pp_scaling_x_steps;
2502     pp_context->pp_y_steps = pp_scaling_y_steps;
2503     pp_context->private_context = &pp_context->pp_scaling_context;
2504     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2505
2506     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2507     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2508     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2509     pp_scaling_context->dest_y = dst_rect->y;
2510     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2511     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2512     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2513     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2514
2515     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2516
2517     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2518     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2519     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2520
2521     dst_surface->flags = src_surface->flags;
2522
2523     return VA_STATUS_SUCCESS;
2524 }
2525
2526 static int
2527 pp_avs_x_steps(void *private_context)
2528 {
2529     struct pp_avs_context *pp_avs_context = private_context;
2530
2531     return pp_avs_context->dest_w / 16;
2532 }
2533
2534 static int
2535 pp_avs_y_steps(void *private_context)
2536 {
2537     return 1;
2538 }
2539
2540 static int
2541 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2542 {
2543     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2544     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2545     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2546     float src_x_steping, src_y_steping, video_step_delta;
2547     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2548
2549     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2550         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2551         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2552     } else if (tmp_w >= pp_avs_context->dest_w) {
2553         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2554         pp_inline_parameter->grf6.video_step_delta = 0;
2555         
2556         if (x == 0) {
2557             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2558                 pp_avs_context->src_normalized_x;
2559         } else {
2560             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2561             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2562             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2563                 16 * 15 * video_step_delta / 2;
2564         }
2565     } else {
2566         int n0, n1, n2, nls_left, nls_right;
2567         int factor_a = 5, factor_b = 4;
2568         float f;
2569
2570         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2571         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2572         n2 = tmp_w / (16 * factor_a);
2573         nls_left = n0 + n2;
2574         nls_right = n1 + n2;
2575         f = (float) n2 * 16 / tmp_w;
2576         
2577         if (n0 < 5) {
2578             pp_inline_parameter->grf6.video_step_delta = 0.0;
2579
2580             if (x == 0) {
2581                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2582                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2583             } else {
2584                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2585                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2586                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2587                     16 * 15 * video_step_delta / 2;
2588             }
2589         } else {
2590             if (x < nls_left) {
2591                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2592                 float a = f / (nls_left * 16 * factor_b);
2593                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2594                 
2595                 pp_inline_parameter->grf6.video_step_delta = b;
2596
2597                 if (x == 0) {
2598                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2599                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2600                 } else {
2601                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2602                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2603                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2604                         16 * 15 * video_step_delta / 2;
2605                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2606                 }
2607             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2608                 /* scale the center linearly */
2609                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2610                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2611                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2612                     16 * 15 * video_step_delta / 2;
2613                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2614                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2615             } else {
2616                 float a = f / (nls_right * 16 * factor_b);
2617                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2618
2619                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2620                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2621                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2622                     16 * 15 * video_step_delta / 2;
2623                 pp_inline_parameter->grf6.video_step_delta = -b;
2624
2625                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2626                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2627                 else
2628                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2629             }
2630         }
2631     }
2632
2633     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2634     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2635     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2636     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2637
2638     return 0;
2639 }
2640
2641 static const AVSConfig gen5_avs_config = {
2642     .coeff_frac_bits = 6,
2643     .coeff_epsilon = 1.0f / (1U << 6),
2644     .num_phases = 16,
2645     .num_luma_coeffs = 8,
2646     .num_chroma_coeffs = 4,
2647
2648     .coeff_range = {
2649         .lower_bound = {
2650             .y_k_h = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2651             .y_k_v = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2652             .uv_k_h = { -1, 0, 0, -1 },
2653             .uv_k_v = { -1, 0, 0, -1 },
2654         },
2655         .upper_bound = {
2656             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2657             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2658             .uv_k_h = { 1, 2, 2, 1 },
2659             .uv_k_v = { 1, 2, 2, 1 },
2660         },
2661     },
2662 };
2663
2664 static const AVSConfig gen6_avs_config = {
2665     .coeff_frac_bits = 6,
2666     .coeff_epsilon = 1.0f / (1U << 6),
2667     .num_phases = 16,
2668     .num_luma_coeffs = 8,
2669     .num_chroma_coeffs = 4,
2670
2671     .coeff_range = {
2672         .lower_bound = {
2673             .y_k_h = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2674             .y_k_v = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2675             .uv_k_h = { -1, 0, 0, -1 },
2676             .uv_k_v = { -1, 0, 0, -1 },
2677         },
2678         .upper_bound = {
2679             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2680             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2681             .uv_k_h = { 1, 2, 2, 1 },
2682             .uv_k_v = { 1, 2, 2, 1 },
2683         },
2684     },
2685 };
2686
2687 static VAStatus
2688 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2689                        const struct i965_surface *src_surface,
2690                        const VARectangle *src_rect,
2691                        struct i965_surface *dst_surface,
2692                        const VARectangle *dst_rect,
2693                        void *filter_param)
2694 {
2695     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2696     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2697     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2698     struct object_surface *obj_surface;
2699     struct i965_sampler_8x8 *sampler_8x8;
2700     struct i965_sampler_8x8_state *sampler_8x8_state;
2701     int index;
2702     int in_w, in_h, in_wpitch, in_hpitch;
2703     int out_w, out_h, out_wpitch, out_hpitch;
2704     int i;
2705     AVSState * const avs = &pp_avs_context->state;
2706     float sx, sy;
2707
2708     const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
2709         VA_FILTER_SCALING_NL_ANAMORPHIC;
2710
2711     /* surface */
2712     obj_surface = (struct object_surface *)src_surface->base;
2713     in_w = obj_surface->orig_width;
2714     in_h = obj_surface->orig_height;
2715     in_wpitch = obj_surface->width;
2716     in_hpitch = obj_surface->height;
2717
2718     /* source Y surface index 1 */
2719     i965_pp_set_surface2_state(ctx, pp_context,
2720                                obj_surface->bo, 0,
2721                                in_w, in_h, in_wpitch,
2722                                0, 0,
2723                                SURFACE_FORMAT_Y8_UNORM, 0,
2724                                1);
2725
2726     /* source UV surface index 2 */
2727     i965_pp_set_surface2_state(ctx, pp_context,
2728                                obj_surface->bo, in_wpitch * in_hpitch,
2729                                in_w / 2, in_h / 2, in_wpitch,
2730                                0, 0,
2731                                SURFACE_FORMAT_R8B8_UNORM, 0,
2732                                2);
2733
2734     /* destination surface */
2735     obj_surface = (struct object_surface *)dst_surface->base;
2736     out_w = obj_surface->orig_width;
2737     out_h = obj_surface->orig_height;
2738     out_wpitch = obj_surface->width;
2739     out_hpitch = obj_surface->height;
2740     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2741
2742     /* destination Y surface index 7 */
2743     i965_pp_set_surface_state(ctx, pp_context,
2744                               obj_surface->bo, 0,
2745                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2746                               7, 1);
2747
2748     /* destination UV surface index 8 */
2749     i965_pp_set_surface_state(ctx, pp_context,
2750                               obj_surface->bo, out_wpitch * out_hpitch,
2751                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2752                               8, 1);
2753
2754     /* sampler 8x8 state */
2755     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2756     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2757     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2758     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2759     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2760
2761     sx = (float)dst_rect->width / src_rect->width;
2762     sy = (float)dst_rect->height / src_rect->height;
2763     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
2764
2765     assert(avs->config->num_phases == 16);
2766     for (i = 0; i <= 16; i++) {
2767         const AVSCoeffs * const coeffs = &avs->coeffs[i];
2768
2769         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
2770             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
2771         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
2772             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
2773         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
2774             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
2775         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
2776             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
2777         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
2778             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
2779         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
2780             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
2781         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
2782             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
2783         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
2784             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
2785
2786         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
2787             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
2788         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
2789             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
2790         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
2791             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
2792         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
2793             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
2794
2795         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
2796             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
2797         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
2798             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
2799         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
2800             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
2801         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
2802             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
2803         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
2804             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
2805         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
2806             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
2807         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
2808             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
2809         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
2810             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
2811
2812         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
2813             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
2814         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
2815             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
2816         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
2817             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
2818         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
2819             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
2820     }
2821
2822     /* Adaptive filter for all channels (DW4.15) */
2823     sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
2824
2825     sampler_8x8_state->dw136.default_sharpness_level =
2826         -avs_is_needed(pp_context->filter_flags);
2827     sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
2828     sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
2829     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2830
2831     /* sampler 8x8 */
2832     dri_bo_map(pp_context->sampler_state_table.bo, True);
2833     assert(pp_context->sampler_state_table.bo->virtual);
2834     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2835     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2836
2837     /* sample_8x8 Y index 1 */
2838     index = 1;
2839     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2840     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2841     sampler_8x8[index].dw0.ief_bypass = 1;
2842     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2843     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2844     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2845     sampler_8x8[index].dw2.global_noise_estimation = 22;
2846     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2847     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2848     sampler_8x8[index].dw3.strong_edge_weight = 7;
2849     sampler_8x8[index].dw3.regular_weight = 2;
2850     sampler_8x8[index].dw3.non_edge_weight = 0;
2851     sampler_8x8[index].dw3.gain_factor = 40;
2852     sampler_8x8[index].dw4.steepness_boost = 0;
2853     sampler_8x8[index].dw4.steepness_threshold = 0;
2854     sampler_8x8[index].dw4.mr_boost = 0;
2855     sampler_8x8[index].dw4.mr_threshold = 5;
2856     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2857     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2858     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2859     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2860     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2861     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2862     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2863     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2864     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2865     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2866     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2867     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2868     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2869     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2870     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2871     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2872     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2873     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2874     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2875     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2876     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2877     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2878     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2879     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2880     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2881     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2882     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2883     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2884     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2885     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2886     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2887     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2888     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2889     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2890     sampler_8x8[index].dw13.limiter_boost = 0;
2891     sampler_8x8[index].dw13.minimum_limiter = 10;
2892     sampler_8x8[index].dw13.maximum_limiter = 11;
2893     sampler_8x8[index].dw14.clip_limiter = 130;
2894     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2895                       I915_GEM_DOMAIN_RENDER, 
2896                       0,
2897                       0,
2898                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2899                       pp_context->sampler_state_table.bo_8x8);
2900
2901     /* sample_8x8 UV index 2 */
2902     index = 2;
2903     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2904     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2905     sampler_8x8[index].dw0.ief_bypass = 1;
2906     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2907     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2908     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2909     sampler_8x8[index].dw2.global_noise_estimation = 22;
2910     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2911     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2912     sampler_8x8[index].dw3.strong_edge_weight = 7;
2913     sampler_8x8[index].dw3.regular_weight = 2;
2914     sampler_8x8[index].dw3.non_edge_weight = 0;
2915     sampler_8x8[index].dw3.gain_factor = 40;
2916     sampler_8x8[index].dw4.steepness_boost = 0;
2917     sampler_8x8[index].dw4.steepness_threshold = 0;
2918     sampler_8x8[index].dw4.mr_boost = 0;
2919     sampler_8x8[index].dw4.mr_threshold = 5;
2920     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2921     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2922     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2923     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2924     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2925     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2926     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2927     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2928     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2929     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2930     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2931     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2932     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2933     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2934     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2935     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2936     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2937     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2938     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2939     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2940     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2941     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2942     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2943     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2944     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2945     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2946     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2947     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2948     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2949     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2950     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2951     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2952     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2953     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2954     sampler_8x8[index].dw13.limiter_boost = 0;
2955     sampler_8x8[index].dw13.minimum_limiter = 10;
2956     sampler_8x8[index].dw13.maximum_limiter = 11;
2957     sampler_8x8[index].dw14.clip_limiter = 130;
2958     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2959                       I915_GEM_DOMAIN_RENDER, 
2960                       0,
2961                       0,
2962                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2963                       pp_context->sampler_state_table.bo_8x8);
2964
2965     dri_bo_unmap(pp_context->sampler_state_table.bo);
2966
2967     /* private function & data */
2968     pp_context->pp_x_steps = pp_avs_x_steps;
2969     pp_context->pp_y_steps = pp_avs_y_steps;
2970     pp_context->private_context = &pp_context->pp_avs_context;
2971     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2972
2973     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2974     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2975     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2976     pp_avs_context->dest_y = dst_rect->y;
2977     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2978     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2979     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2980     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2981     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2982     pp_avs_context->src_h = src_rect->height;
2983
2984     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2985     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2986
2987     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2988     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2989     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2990     pp_inline_parameter->grf6.video_step_delta = 0.0;
2991
2992     dst_surface->flags = src_surface->flags;
2993
2994     return VA_STATUS_SUCCESS;
2995 }
2996
2997 static VAStatus
2998 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2999                              const struct i965_surface *src_surface,
3000                              const VARectangle *src_rect,
3001                              struct i965_surface *dst_surface,
3002                              const VARectangle *dst_rect,
3003                              void *filter_param)
3004 {
3005     return pp_nv12_avs_initialize(ctx, pp_context,
3006                                   src_surface,
3007                                   src_rect,
3008                                   dst_surface,
3009                                   dst_rect,
3010                                   filter_param);
3011 }
3012
3013 static int
3014 gen7_pp_avs_x_steps(void *private_context)
3015 {
3016     struct pp_avs_context *pp_avs_context = private_context;
3017
3018     return pp_avs_context->dest_w / 16;
3019 }
3020
3021 static int
3022 gen7_pp_avs_y_steps(void *private_context)
3023 {
3024     struct pp_avs_context *pp_avs_context = private_context;
3025
3026     return pp_avs_context->dest_h / 16;
3027 }
3028
3029 static int
3030 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3031 {
3032     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3033     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3034
3035     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3036     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3037     pp_inline_parameter->grf9.constant_0 = 0xffffffff;
3038     pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3039
3040     return 0;
3041 }
3042
3043 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
3044                                               struct i965_post_processing_context *pp_context,
3045                                               const struct i965_surface *surface)
3046 {
3047     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3048     int fourcc = pp_get_surface_fourcc(ctx, surface);
3049     
3050     if (fourcc == VA_FOURCC_YUY2) {
3051         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3052         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3053         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3054     } else if (fourcc == VA_FOURCC_UYVY) {
3055         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3056         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3057         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3058     }
3059 }
3060
3061 static VAStatus
3062 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3063                            const struct i965_surface *src_surface,
3064                            const VARectangle *src_rect,
3065                            struct i965_surface *dst_surface,
3066                            const VARectangle *dst_rect,
3067                            void *filter_param)
3068 {
3069     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3070     struct i965_driver_data *i965 = i965_driver_data(ctx);
3071     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3072     struct gen7_sampler_8x8 *sampler_8x8;
3073     struct i965_sampler_8x8_state *sampler_8x8_state;
3074     int index, i;
3075     int width[3], height[3], pitch[3], offset[3];
3076     int src_width, src_height;
3077     AVSState * const avs = &pp_avs_context->state;
3078     float sx, sy;
3079     const float * yuv_to_rgb_coefs;
3080     size_t yuv_to_rgb_coefs_size;
3081
3082     /* source surface */
3083     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3084                                          src_rect,
3085                                          width, height, pitch, offset);
3086     src_width = width[0];
3087     src_height = height[0];
3088
3089     /* destination surface */
3090     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3091                                          dst_rect,
3092                                          width, height, pitch, offset);
3093
3094     /* sampler 8x8 state */
3095     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3096     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3097     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3098     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3099     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3100
3101     sx = (float)dst_rect->width / src_rect->width;
3102     sy = (float)dst_rect->height / src_rect->height;
3103     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
3104
3105     assert(avs->config->num_phases == 16);
3106     for (i = 0; i <= 16; i++) {
3107         const AVSCoeffs * const coeffs = &avs->coeffs[i];
3108
3109         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
3110             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
3111         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
3112             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
3113         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
3114             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
3115         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
3116             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
3117         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
3118             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
3119         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
3120             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
3121         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
3122             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
3123         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
3124             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
3125
3126         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
3127             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
3128         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
3129             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
3130         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
3131             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
3132         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
3133             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
3134
3135         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
3136             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
3137         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
3138             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
3139         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
3140             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
3141         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
3142             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
3143         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
3144             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
3145         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
3146             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
3147         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
3148             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
3149         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
3150             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
3151
3152         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
3153             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
3154         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
3155             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
3156         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
3157             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
3158         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
3159             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
3160     }
3161
3162     sampler_8x8_state->dw136.default_sharpness_level =
3163         -avs_is_needed(pp_context->filter_flags);
3164     if (IS_HASWELL(i965->intel.device_info)) {
3165         sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
3166         sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
3167         sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
3168     }
3169     else {
3170         sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
3171         sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
3172         sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
3173     }
3174     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3175
3176     /* sampler 8x8 */
3177     dri_bo_map(pp_context->sampler_state_table.bo, True);
3178     assert(pp_context->sampler_state_table.bo->virtual);
3179     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3180     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3181
3182     /* sample_8x8 Y index 4 */
3183     index = 4;
3184     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3185     sampler_8x8[index].dw0.global_noise_estimation = 255;
3186     sampler_8x8[index].dw0.ief_bypass = 1;
3187
3188     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3189
3190     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3191     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3192     sampler_8x8[index].dw2.r5x_coefficient = 9;
3193     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3194     sampler_8x8[index].dw2.r5c_coefficient = 3;
3195
3196     sampler_8x8[index].dw3.r3x_coefficient = 27;
3197     sampler_8x8[index].dw3.r3c_coefficient = 5;
3198     sampler_8x8[index].dw3.gain_factor = 40;
3199     sampler_8x8[index].dw3.non_edge_weight = 1;
3200     sampler_8x8[index].dw3.regular_weight = 2;
3201     sampler_8x8[index].dw3.strong_edge_weight = 7;
3202     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3203
3204     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3205                       I915_GEM_DOMAIN_RENDER, 
3206                       0,
3207                       0,
3208                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3209                       pp_context->sampler_state_table.bo_8x8);
3210
3211     /* sample_8x8 UV index 8 */
3212     index = 8;
3213     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3214     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3215     sampler_8x8[index].dw0.global_noise_estimation = 255;
3216     sampler_8x8[index].dw0.ief_bypass = 1;
3217     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3218     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3219     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3220     sampler_8x8[index].dw2.r5x_coefficient = 9;
3221     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3222     sampler_8x8[index].dw2.r5c_coefficient = 3;
3223     sampler_8x8[index].dw3.r3x_coefficient = 27;
3224     sampler_8x8[index].dw3.r3c_coefficient = 5;
3225     sampler_8x8[index].dw3.gain_factor = 40;
3226     sampler_8x8[index].dw3.non_edge_weight = 1;
3227     sampler_8x8[index].dw3.regular_weight = 2;
3228     sampler_8x8[index].dw3.strong_edge_weight = 7;
3229     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3230
3231     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3232                       I915_GEM_DOMAIN_RENDER, 
3233                       0,
3234                       0,
3235                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3236                       pp_context->sampler_state_table.bo_8x8);
3237
3238     /* sampler_8x8 V, index 12 */
3239     index = 12;
3240     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3241     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3242     sampler_8x8[index].dw0.global_noise_estimation = 255;
3243     sampler_8x8[index].dw0.ief_bypass = 1;
3244     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3245     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3246     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3247     sampler_8x8[index].dw2.r5x_coefficient = 9;
3248     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3249     sampler_8x8[index].dw2.r5c_coefficient = 3;
3250     sampler_8x8[index].dw3.r3x_coefficient = 27;
3251     sampler_8x8[index].dw3.r3c_coefficient = 5;
3252     sampler_8x8[index].dw3.gain_factor = 40;
3253     sampler_8x8[index].dw3.non_edge_weight = 1;
3254     sampler_8x8[index].dw3.regular_weight = 2;
3255     sampler_8x8[index].dw3.strong_edge_weight = 7;
3256     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3257
3258     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3259                       I915_GEM_DOMAIN_RENDER, 
3260                       0,
3261                       0,
3262                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3263                       pp_context->sampler_state_table.bo_8x8);
3264
3265     dri_bo_unmap(pp_context->sampler_state_table.bo);
3266
3267     /* private function & data */
3268     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3269     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3270     pp_context->private_context = &pp_context->pp_avs_context;
3271     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3272
3273     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
3274     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3275     pp_avs_context->dest_y = dst_rect->y;
3276     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3277     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3278     pp_avs_context->src_w = src_rect->width;
3279     pp_avs_context->src_h = src_rect->height;
3280     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3281
3282     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3283     dw = MAX(dw, dst_rect->width + dst_left_edge_extend);
3284
3285     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3286     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3287     if (IS_HASWELL(i965->intel.device_info))
3288         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3289
3290     if (pp_static_parameter->grf2.avs_wa_enable) {
3291         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3292         if ((src_fourcc == VA_FOURCC_RGBA) ||
3293             (src_fourcc == VA_FOURCC_RGBX) ||
3294             (src_fourcc == VA_FOURCC_BGRA) ||
3295             (src_fourcc == VA_FOURCC_BGRX)) {
3296             pp_static_parameter->grf2.avs_wa_enable = 0;
3297         }
3298     }
3299         
3300     pp_static_parameter->grf2.avs_wa_width = src_width;
3301     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3302     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3303     pp_static_parameter->grf2.alpha = 255;
3304
3305     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3306     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3307     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3308         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3309     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3310         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3311
3312     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3313
3314     yuv_to_rgb_coefs = i915_color_standard_to_coefs (i915_filter_to_color_standard (src_surface->flags &
3315                                                                                     VA_SRC_COLOR_MASK),
3316                                                      &yuv_to_rgb_coefs_size);
3317     memcpy(&pp_static_parameter->grf7, yuv_to_rgb_coefs, yuv_to_rgb_coefs_size);
3318
3319     dst_surface->flags = src_surface->flags;
3320
3321     return VA_STATUS_SUCCESS;
3322 }
3323
3324 static int
3325 pp_dndi_x_steps(void *private_context)
3326 {
3327     return 1;
3328 }
3329
3330 static int
3331 pp_dndi_y_steps(void *private_context)
3332 {
3333     struct pp_dndi_context *pp_dndi_context = private_context;
3334
3335     return pp_dndi_context->dest_h / 4;
3336 }
3337
3338 static int
3339 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3340 {
3341     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3342
3343     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3344     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3345
3346     return 0;
3347 }
3348
3349 static VAStatus
3350 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3351                         const struct i965_surface *src_surface,
3352                         const VARectangle *src_rect,
3353                         struct i965_surface *dst_surface,
3354                         const VARectangle *dst_rect,
3355                         void *filter_param)
3356 {
3357     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3358     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3359     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3360     const VAProcPipelineParameterBuffer * const pipe_params =
3361         pp_context->pipeline_param;
3362     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3363         filter_param;
3364     struct object_surface * const src_obj_surface = (struct object_surface *)
3365         src_surface->base;
3366     struct object_surface * const dst_obj_surface = (struct object_surface *)
3367         dst_surface->base;
3368     struct object_surface *obj_surface;
3369     struct i965_sampler_dndi *sampler_dndi;
3370     int index, dndi_top_first;
3371     int w, h, orig_w, orig_h;
3372     VAStatus status;
3373
3374     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3375         pipe_params, deint_params);
3376     if (status != VA_STATUS_SUCCESS)
3377         return status;
3378
3379     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3380         src_obj_surface, dst_obj_surface);
3381     if (status != VA_STATUS_SUCCESS)
3382         return status;
3383
3384     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3385         src_obj_surface, dst_obj_surface);
3386     if (status != VA_STATUS_SUCCESS)
3387         return status;
3388
3389     /* Current input surface (index = 4) */
3390     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3391     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3392         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3393         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3394
3395     /* Previous input surface (index = 5) */
3396     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3397     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3398         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3399         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
3400
3401     /* STMM input surface (index = 6) */
3402     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3403     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3404         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3405         I965_SURFACEFORMAT_R8_UNORM, 6, 1);
3406
3407     /* Previous output surfaces (index = { 7, 8 }) */
3408     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3409     w = obj_surface->width;
3410     h = obj_surface->height;
3411     orig_w = obj_surface->orig_width;
3412     orig_h = obj_surface->orig_height;
3413
3414     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3415         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
3416     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3417         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
3418
3419     /* Current output surfaces (index = { 10, 11 }) */
3420     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3421     w = obj_surface->width;
3422     h = obj_surface->height;
3423     orig_w = obj_surface->orig_width;
3424     orig_h = obj_surface->orig_height;
3425
3426     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3427         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
3428     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3429         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
3430
3431     /* STMM output surface (index = 20) */
3432     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3433     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3434         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3435         I965_SURFACEFORMAT_R8_UNORM, 20, 1);
3436
3437     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3438
3439     /* sampler dndi */
3440     dri_bo_map(pp_context->sampler_state_table.bo, True);
3441     assert(pp_context->sampler_state_table.bo->virtual);
3442     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3443     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3444
3445     /* sample dndi index 1 */
3446     index = 0;
3447     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3448     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3449     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3450     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3451
3452     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3453     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3454     sampler_dndi[index].dw1.stmm_c2 = 1;
3455     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3456     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3457
3458     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3459     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3460     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3461     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3462
3463     sampler_dndi[index].dw3.maximum_stmm = 150;
3464     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3465     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3466     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3467     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3468
3469     sampler_dndi[index].dw4.sdi_delta = 5;
3470     sampler_dndi[index].dw4.sdi_threshold = 100;
3471     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3472     sampler_dndi[index].dw4.stmm_shift_up = 1;
3473     sampler_dndi[index].dw4.stmm_shift_down = 3;
3474     sampler_dndi[index].dw4.minimum_stmm = 118;
3475
3476     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3477     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3478     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3479     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3480
3481     sampler_dndi[index].dw6.dn_enable = 1;
3482     sampler_dndi[index].dw6.di_enable = 1;
3483     sampler_dndi[index].dw6.di_partial = 0;
3484     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3485     sampler_dndi[index].dw6.dndi_stream_id = 0;
3486     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3487     sampler_dndi[index].dw6.progressive_dn = 0;
3488     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3489     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3490     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3491
3492     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3493     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3494     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3495     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3496
3497     dri_bo_unmap(pp_context->sampler_state_table.bo);
3498
3499     /* private function & data */
3500     pp_context->pp_x_steps = pp_dndi_x_steps;
3501     pp_context->pp_y_steps = pp_dndi_y_steps;
3502     pp_context->private_context = dndi_ctx;
3503     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3504
3505     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3506     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3507     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3508     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3509
3510     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3511     pp_inline_parameter->grf5.number_blocks = w / 16;
3512     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3513     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3514
3515     dndi_ctx->dest_w = w;
3516     dndi_ctx->dest_h = h;
3517
3518     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3519     return VA_STATUS_SUCCESS;
3520 }
3521
3522 static int
3523 pp_dn_x_steps(void *private_context)
3524 {
3525     return 1;
3526 }
3527
3528 static int
3529 pp_dn_y_steps(void *private_context)
3530 {
3531     struct pp_dn_context *pp_dn_context = private_context;
3532
3533     return pp_dn_context->dest_h / 8;
3534 }
3535
3536 static int
3537 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3538 {
3539     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3540
3541     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3542     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3543
3544     return 0;
3545 }
3546
3547 static VAStatus
3548 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3549                       const struct i965_surface *src_surface,
3550                       const VARectangle *src_rect,
3551                       struct i965_surface *dst_surface,
3552                       const VARectangle *dst_rect,
3553                       void *filter_param)
3554 {
3555     struct i965_driver_data *i965 = i965_driver_data(ctx);
3556     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3557     struct object_surface *obj_surface;
3558     struct i965_sampler_dndi *sampler_dndi;
3559     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3560     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3561     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3562     int index;
3563     int w, h;
3564     int orig_w, orig_h;
3565     int dn_strength = 15;
3566     int dndi_top_first = 1;
3567     int dn_progressive = 0;
3568
3569     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3570         dndi_top_first = 1;
3571         dn_progressive = 1;
3572     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3573         dndi_top_first = 1;
3574         dn_progressive = 0;
3575     } else {
3576         dndi_top_first = 0;
3577         dn_progressive = 0;
3578     }
3579
3580     if (dn_filter_param) {
3581         float value = dn_filter_param->value;
3582         
3583         if (value > 1.0)
3584             value = 1.0;
3585         
3586         if (value < 0.0)
3587             value = 0.0;
3588
3589         dn_strength = (int)(value * 31.0F);
3590     }
3591
3592     /* surface */
3593     obj_surface = (struct object_surface *)src_surface->base;
3594     orig_w = obj_surface->orig_width;
3595     orig_h = obj_surface->orig_height;
3596     w = obj_surface->width;
3597     h = obj_surface->height;
3598
3599     if (pp_dn_context->stmm_bo == NULL) {
3600         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3601                                               "STMM surface",
3602                                               w * h,
3603                                               4096);
3604         assert(pp_dn_context->stmm_bo);
3605     }
3606
3607     /* source UV surface index 2 */
3608     i965_pp_set_surface_state(ctx, pp_context,
3609                               obj_surface->bo, w * h,
3610                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3611                               2, 0);
3612
3613     /* source YUV surface index 4 */
3614     i965_pp_set_surface2_state(ctx, pp_context,
3615                                obj_surface->bo, 0,
3616                                orig_w, orig_h, w,
3617                                0, h,
3618                                SURFACE_FORMAT_PLANAR_420_8, 1,
3619                                4);
3620
3621     /* source STMM surface index 20 */
3622     i965_pp_set_surface_state(ctx, pp_context,
3623                               pp_dn_context->stmm_bo, 0,
3624                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3625                               20, 1);
3626
3627     /* destination surface */
3628     obj_surface = (struct object_surface *)dst_surface->base;
3629     orig_w = obj_surface->orig_width;
3630     orig_h = obj_surface->orig_height;
3631     w = obj_surface->width;
3632     h = obj_surface->height;
3633
3634     /* destination Y surface index 7 */
3635     i965_pp_set_surface_state(ctx, pp_context,
3636                               obj_surface->bo, 0,
3637                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3638                               7, 1);
3639
3640     /* destination UV surface index 8 */
3641     i965_pp_set_surface_state(ctx, pp_context,
3642                               obj_surface->bo, w * h,
3643                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3644                               8, 1);
3645     /* sampler dn */
3646     dri_bo_map(pp_context->sampler_state_table.bo, True);
3647     assert(pp_context->sampler_state_table.bo->virtual);
3648     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3649     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3650
3651     /* sample dndi index 1 */
3652     index = 0;
3653     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3654     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3655     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3656     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3657
3658     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3659     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3660     sampler_dndi[index].dw1.stmm_c2 = 0;
3661     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3662     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3663
3664     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3665     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3666     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3667     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3668
3669     sampler_dndi[index].dw3.maximum_stmm = 128;
3670     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3671     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3672     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3673     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3674
3675     sampler_dndi[index].dw4.sdi_delta = 8;
3676     sampler_dndi[index].dw4.sdi_threshold = 128;
3677     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3678     sampler_dndi[index].dw4.stmm_shift_up = 0;
3679     sampler_dndi[index].dw4.stmm_shift_down = 0;
3680     sampler_dndi[index].dw4.minimum_stmm = 0;
3681
3682     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3683     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3684     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3685     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3686
3687     sampler_dndi[index].dw6.dn_enable = 1;
3688     sampler_dndi[index].dw6.di_enable = 0;
3689     sampler_dndi[index].dw6.di_partial = 0;
3690     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3691     sampler_dndi[index].dw6.dndi_stream_id = 1;
3692     sampler_dndi[index].dw6.dndi_first_frame = 1;
3693     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3694     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3695     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3696     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3697
3698     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3699     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3700     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3701     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3702
3703     dri_bo_unmap(pp_context->sampler_state_table.bo);
3704
3705     /* private function & data */
3706     pp_context->pp_x_steps = pp_dn_x_steps;
3707     pp_context->pp_y_steps = pp_dn_y_steps;
3708     pp_context->private_context = &pp_context->pp_dn_context;
3709     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3710
3711     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3712     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3713     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3714     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3715
3716     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3717     pp_inline_parameter->grf5.number_blocks = w / 16;
3718     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3719     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3720
3721     pp_dn_context->dest_w = w;
3722     pp_dn_context->dest_h = h;
3723
3724     dst_surface->flags = src_surface->flags;
3725     
3726     return VA_STATUS_SUCCESS;
3727 }
3728
3729 static int
3730 gen7_pp_dndi_x_steps(void *private_context)
3731 {
3732     struct pp_dndi_context *pp_dndi_context = private_context;
3733
3734     return pp_dndi_context->dest_w / 16;
3735 }
3736
3737 static int
3738 gen7_pp_dndi_y_steps(void *private_context)
3739 {
3740     struct pp_dndi_context *pp_dndi_context = private_context;
3741
3742     return pp_dndi_context->dest_h / 4;
3743 }
3744
3745 static int
3746 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3747 {
3748     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3749
3750     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16;
3751     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 4;
3752
3753     return 0;
3754 }
3755
3756 static VAStatus
3757 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3758                              const struct i965_surface *src_surface,
3759                              const VARectangle *src_rect,
3760                              struct i965_surface *dst_surface,
3761                              const VARectangle *dst_rect,
3762                              void *filter_param)
3763 {
3764     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3765     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3766     const VAProcPipelineParameterBuffer * const pipe_params =
3767         pp_context->pipeline_param;
3768     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3769         filter_param;
3770     struct object_surface * const src_obj_surface = (struct object_surface *)
3771         src_surface->base;
3772     struct object_surface * const dst_obj_surface = (struct object_surface *)
3773         dst_surface->base;
3774     struct object_surface *obj_surface;
3775     struct gen7_sampler_dndi *sampler_dndi;
3776     int index, dndi_top_first;
3777     int w, h, orig_w, orig_h;
3778     VAStatus status;
3779
3780     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3781         pipe_params, deint_params);
3782     if (status != VA_STATUS_SUCCESS)
3783         return status;
3784
3785     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3786         src_obj_surface, dst_obj_surface);
3787     if (status != VA_STATUS_SUCCESS)
3788         return status;
3789
3790     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3791         src_obj_surface, dst_obj_surface);
3792     if (status != VA_STATUS_SUCCESS)
3793         return status;
3794
3795     /* Current input surface (index = 3) */
3796     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3797     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3798         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3799         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
3800
3801     /* Previous input surface (index = 4) */
3802     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3803     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3804         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3805         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3806
3807     /* STMM input surface (index = 5) */
3808     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3809     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3810         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3811         I965_SURFACEFORMAT_R8_UNORM, 5, 1);
3812
3813     /* Previous output surfaces (index = { 27, 28 }) */
3814     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3815     w = obj_surface->width;
3816     h = obj_surface->height;
3817     orig_w = obj_surface->orig_width;
3818     orig_h = obj_surface->orig_height;
3819
3820     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3821         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
3822     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3823         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
3824
3825     /* Current output surfaces (index = { 30, 31 }) */
3826     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3827     w = obj_surface->width;
3828     h = obj_surface->height;
3829     orig_w = obj_surface->orig_width;
3830     orig_h = obj_surface->orig_height;
3831
3832     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3833         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
3834     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3835         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
3836
3837     /* STMM output surface (index = 33) */
3838     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3839     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3840         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3841         I965_SURFACEFORMAT_R8_UNORM, 33, 1);
3842
3843     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3844
3845     /* sampler dndi */
3846     dri_bo_map(pp_context->sampler_state_table.bo, True);
3847     assert(pp_context->sampler_state_table.bo->virtual);
3848     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3849     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3850
3851     /* sample dndi index 0 */
3852     index = 0;
3853     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3854     sampler_dndi[index].dw0.dnmh_delt = 7;
3855     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3856     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3857     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3858     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3859
3860     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3861     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3862     sampler_dndi[index].dw1.stmm_c2 = 2;
3863     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3864     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3865
3866     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3867     sampler_dndi[index].dw2.bne_edge_th = 1;
3868     sampler_dndi[index].dw2.smooth_mv_th = 0;
3869     sampler_dndi[index].dw2.sad_tight_th = 5;
3870     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3871     sampler_dndi[index].dw2.good_neighbor_th = 12;
3872
3873     sampler_dndi[index].dw3.maximum_stmm = 150;
3874     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3875     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3876     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3877     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3878
3879     sampler_dndi[index].dw4.sdi_delta = 5;
3880     sampler_dndi[index].dw4.sdi_threshold = 100;
3881     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3882     sampler_dndi[index].dw4.stmm_shift_up = 1;
3883     sampler_dndi[index].dw4.stmm_shift_down = 3;
3884     sampler_dndi[index].dw4.minimum_stmm = 118;
3885
3886     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3887     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3888     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3889     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3890     sampler_dndi[index].dw6.dn_enable = 0;
3891     sampler_dndi[index].dw6.di_enable = 1;
3892     sampler_dndi[index].dw6.di_partial = 0;
3893     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3894     sampler_dndi[index].dw6.dndi_stream_id = 1;
3895     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3896     sampler_dndi[index].dw6.progressive_dn = 0;
3897     sampler_dndi[index].dw6.mcdi_enable =
3898         (deint_params->algorithm == VAProcDeinterlacingMotionCompensated);
3899     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3900     sampler_dndi[index].dw6.cat_th1 = 0;
3901     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3902     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3903
3904     sampler_dndi[index].dw7.sad_tha = 5;
3905     sampler_dndi[index].dw7.sad_thb = 10;
3906     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3907     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3908     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3909     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3910     sampler_dndi[index].dw7.neighborpixel_th = 10;
3911     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3912
3913     dri_bo_unmap(pp_context->sampler_state_table.bo);
3914
3915     /* private function & data */
3916     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3917     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3918     pp_context->private_context = dndi_ctx;
3919     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3920
3921     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3922     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3923     pp_static_parameter->grf1.di_top_field_first = 0;
3924     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3925
3926     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3927     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3928     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3929
3930     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3931     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3932
3933     dndi_ctx->dest_w = w;
3934     dndi_ctx->dest_h = h;
3935
3936     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3937     return VA_STATUS_SUCCESS;
3938 }
3939
3940 static int
3941 gen7_pp_dn_x_steps(void *private_context)
3942 {
3943     struct pp_dn_context *pp_dn_context = private_context;
3944
3945     return pp_dn_context->dest_w / 16;
3946 }
3947
3948 static int
3949 gen7_pp_dn_y_steps(void *private_context)
3950 {
3951     struct pp_dn_context *pp_dn_context = private_context;
3952
3953     return pp_dn_context->dest_h / 4;
3954 }
3955
3956 static int
3957 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3958 {
3959     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3960
3961     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3962     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3963
3964     return 0;
3965 }
3966
3967 static VAStatus
3968 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3969                            const struct i965_surface *src_surface,
3970                            const VARectangle *src_rect,
3971                            struct i965_surface *dst_surface,
3972                            const VARectangle *dst_rect,
3973                            void *filter_param)
3974 {
3975     struct i965_driver_data *i965 = i965_driver_data(ctx);
3976     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3977     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3978     struct object_surface *obj_surface;
3979     struct gen7_sampler_dndi *sampler_dn;
3980     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3981     int index;
3982     int w, h;
3983     int orig_w, orig_h;
3984     int dn_strength = 15;
3985     int dndi_top_first = 1;
3986     int dn_progressive = 0;
3987
3988     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3989         dndi_top_first = 1;
3990         dn_progressive = 1;
3991     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3992         dndi_top_first = 1;
3993         dn_progressive = 0;
3994     } else {
3995         dndi_top_first = 0;
3996         dn_progressive = 0;
3997     }
3998
3999     if (dn_filter_param) {
4000         float value = dn_filter_param->value;
4001         
4002         if (value > 1.0)
4003             value = 1.0;
4004         
4005         if (value < 0.0)
4006             value = 0.0;
4007
4008         dn_strength = (int)(value * 31.0F);
4009     }
4010
4011     /* surface */
4012     obj_surface = (struct object_surface *)src_surface->base;
4013     orig_w = obj_surface->orig_width;
4014     orig_h = obj_surface->orig_height;
4015     w = obj_surface->width;
4016     h = obj_surface->height;
4017
4018     if (pp_dn_context->stmm_bo == NULL) {
4019         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
4020                                              "STMM surface",
4021                                              w * h,
4022                                              4096);
4023         assert(pp_dn_context->stmm_bo);
4024     }
4025
4026     /* source UV surface index 1 */
4027     gen7_pp_set_surface_state(ctx, pp_context,
4028                               obj_surface->bo, w * h,
4029                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4030                               1, 0);
4031
4032     /* source YUV surface index 3 */
4033     gen7_pp_set_surface2_state(ctx, pp_context,
4034                                obj_surface->bo, 0,
4035                                orig_w, orig_h, w,
4036                                0, h,
4037                                SURFACE_FORMAT_PLANAR_420_8, 1,
4038                                3);
4039
4040     /* source (temporal reference) YUV surface index 4 */
4041     gen7_pp_set_surface2_state(ctx, pp_context,
4042                                obj_surface->bo, 0,
4043                                orig_w, orig_h, w,
4044                                0, h,
4045                                SURFACE_FORMAT_PLANAR_420_8, 1,
4046                                4);
4047
4048     /* STMM / History Statistics input surface, index 5 */
4049     gen7_pp_set_surface_state(ctx, pp_context,
4050                               pp_dn_context->stmm_bo, 0,
4051                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4052                               33, 1);
4053
4054     /* destination surface */
4055     obj_surface = (struct object_surface *)dst_surface->base;
4056     orig_w = obj_surface->orig_width;
4057     orig_h = obj_surface->orig_height;
4058     w = obj_surface->width;
4059     h = obj_surface->height;
4060
4061     /* destination Y surface index 24 */
4062     gen7_pp_set_surface_state(ctx, pp_context,
4063                               obj_surface->bo, 0,
4064                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4065                               24, 1);
4066
4067     /* destination UV surface index 25 */
4068     gen7_pp_set_surface_state(ctx, pp_context,
4069                               obj_surface->bo, w * h,
4070                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4071                               25, 1);
4072
4073     /* sampler dn */
4074     dri_bo_map(pp_context->sampler_state_table.bo, True);
4075     assert(pp_context->sampler_state_table.bo->virtual);
4076     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4077     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4078
4079     /* sample dn index 1 */
4080     index = 0;
4081     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4082     sampler_dn[index].dw0.dnmh_delt = 8;
4083     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4084     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4085     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4086     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4087
4088     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4089     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4090     sampler_dn[index].dw1.stmm_c2 = 0;
4091     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4092     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4093
4094     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4095     sampler_dn[index].dw2.bne_edge_th = 1;
4096     sampler_dn[index].dw2.smooth_mv_th = 0;
4097     sampler_dn[index].dw2.sad_tight_th = 5;
4098     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4099     sampler_dn[index].dw2.good_neighbor_th = 4;
4100
4101     sampler_dn[index].dw3.maximum_stmm = 128;
4102     sampler_dn[index].dw3.multipler_for_vecm = 2;
4103     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4104     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4105     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4106
4107     sampler_dn[index].dw4.sdi_delta = 8;
4108     sampler_dn[index].dw4.sdi_threshold = 128;
4109     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4110     sampler_dn[index].dw4.stmm_shift_up = 0;
4111     sampler_dn[index].dw4.stmm_shift_down = 0;
4112     sampler_dn[index].dw4.minimum_stmm = 0;
4113
4114     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4115     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4116     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4117     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4118
4119     sampler_dn[index].dw6.dn_enable = 1;
4120     sampler_dn[index].dw6.di_enable = 0;
4121     sampler_dn[index].dw6.di_partial = 0;
4122     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4123     sampler_dn[index].dw6.dndi_stream_id = 1;
4124     sampler_dn[index].dw6.dndi_first_frame = 1;
4125     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4126     sampler_dn[index].dw6.mcdi_enable = 0;
4127     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4128     sampler_dn[index].dw6.cat_th1 = 0;
4129     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4130     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4131
4132     sampler_dn[index].dw7.sad_tha = 5;
4133     sampler_dn[index].dw7.sad_thb = 10;
4134     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4135     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4136     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4137     sampler_dn[index].dw7.vdi_walker_enable = 0;
4138     sampler_dn[index].dw7.neighborpixel_th = 10;
4139     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4140
4141     dri_bo_unmap(pp_context->sampler_state_table.bo);
4142
4143     /* private function & data */
4144     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4145     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4146     pp_context->private_context = &pp_context->pp_dn_context;
4147     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4148
4149     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4150     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4151     pp_static_parameter->grf1.di_top_field_first = 0;
4152     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4153
4154     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4155     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4156     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4157
4158     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4159     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4160
4161     pp_dn_context->dest_w = w;
4162     pp_dn_context->dest_h = h;
4163
4164     dst_surface->flags = src_surface->flags;
4165
4166     return VA_STATUS_SUCCESS;
4167 }
4168
4169 static VAStatus
4170 ironlake_pp_initialize(
4171     VADriverContextP ctx,
4172     struct i965_post_processing_context *pp_context,
4173     const struct i965_surface *src_surface,
4174     const VARectangle *src_rect,
4175     struct i965_surface *dst_surface,
4176     const VARectangle *dst_rect,
4177     int pp_index,
4178     void *filter_param
4179 )
4180 {
4181     VAStatus va_status;
4182     struct i965_driver_data *i965 = i965_driver_data(ctx);
4183     struct pp_module *pp_module;
4184     dri_bo *bo;
4185     int static_param_size, inline_param_size;
4186
4187     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4188     bo = dri_bo_alloc(i965->intel.bufmgr,
4189                       "surface state & binding table",
4190                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4191                       4096);
4192     assert(bo);
4193     pp_context->surface_state_binding_table.bo = bo;
4194
4195     dri_bo_unreference(pp_context->curbe.bo);
4196     bo = dri_bo_alloc(i965->intel.bufmgr,
4197                       "constant buffer",
4198                       4096, 
4199                       4096);
4200     assert(bo);
4201     pp_context->curbe.bo = bo;
4202
4203     dri_bo_unreference(pp_context->idrt.bo);
4204     bo = dri_bo_alloc(i965->intel.bufmgr, 
4205                       "interface discriptor", 
4206                       sizeof(struct i965_interface_descriptor), 
4207                       4096);
4208     assert(bo);
4209     pp_context->idrt.bo = bo;
4210     pp_context->idrt.num_interface_descriptors = 0;
4211
4212     dri_bo_unreference(pp_context->sampler_state_table.bo);
4213     bo = dri_bo_alloc(i965->intel.bufmgr, 
4214                       "sampler state table", 
4215                       4096,
4216                       4096);
4217     assert(bo);
4218     dri_bo_map(bo, True);
4219     memset(bo->virtual, 0, bo->size);
4220     dri_bo_unmap(bo);
4221     pp_context->sampler_state_table.bo = bo;
4222
4223     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4224     bo = dri_bo_alloc(i965->intel.bufmgr, 
4225                       "sampler 8x8 state ",
4226                       4096,
4227                       4096);
4228     assert(bo);
4229     pp_context->sampler_state_table.bo_8x8 = bo;
4230
4231     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4232     bo = dri_bo_alloc(i965->intel.bufmgr, 
4233                       "sampler 8x8 state ",
4234                       4096,
4235                       4096);
4236     assert(bo);
4237     pp_context->sampler_state_table.bo_8x8_uv = bo;
4238
4239     dri_bo_unreference(pp_context->vfe_state.bo);
4240     bo = dri_bo_alloc(i965->intel.bufmgr, 
4241                       "vfe state", 
4242                       sizeof(struct i965_vfe_state), 
4243                       4096);
4244     assert(bo);
4245     pp_context->vfe_state.bo = bo;
4246
4247     static_param_size = sizeof(struct pp_static_parameter);
4248     inline_param_size = sizeof(struct pp_inline_parameter);
4249
4250     memset(pp_context->pp_static_parameter, 0, static_param_size);
4251     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4252     
4253     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4254     pp_context->current_pp = pp_index;
4255     pp_module = &pp_context->pp_modules[pp_index];
4256     
4257     if (pp_module->initialize)
4258         va_status = pp_module->initialize(ctx, pp_context,
4259                                           src_surface,
4260                                           src_rect,
4261                                           dst_surface,
4262                                           dst_rect,
4263                                           filter_param);
4264     else
4265         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4266
4267     return va_status;
4268 }
4269
4270 static VAStatus
4271 ironlake_post_processing(
4272     VADriverContextP   ctx,
4273     struct i965_post_processing_context *pp_context,
4274     const struct i965_surface *src_surface,
4275     const VARectangle *src_rect,
4276     struct i965_surface *dst_surface,
4277     const VARectangle *dst_rect,
4278     int                pp_index,
4279     void *filter_param
4280 )
4281 {
4282     VAStatus va_status;
4283
4284     va_status = ironlake_pp_initialize(ctx, pp_context,
4285                                        src_surface,
4286                                        src_rect,
4287                                        dst_surface,
4288                                        dst_rect,
4289                                        pp_index,
4290                                        filter_param);
4291
4292     if (va_status == VA_STATUS_SUCCESS) {
4293         ironlake_pp_states_setup(ctx, pp_context);
4294         ironlake_pp_pipeline_setup(ctx, pp_context);
4295     }
4296
4297     return va_status;
4298 }
4299
4300 static VAStatus
4301 gen6_pp_initialize(
4302     VADriverContextP ctx,
4303     struct i965_post_processing_context *pp_context,
4304     const struct i965_surface *src_surface,
4305     const VARectangle *src_rect,
4306     struct i965_surface *dst_surface,
4307     const VARectangle *dst_rect,
4308     int pp_index,
4309     void *filter_param
4310 )
4311 {
4312     VAStatus va_status;
4313     struct i965_driver_data *i965 = i965_driver_data(ctx);
4314     struct pp_module *pp_module;
4315     dri_bo *bo;
4316     int static_param_size, inline_param_size;
4317
4318     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4319     bo = dri_bo_alloc(i965->intel.bufmgr,
4320                       "surface state & binding table",
4321                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4322                       4096);
4323     assert(bo);
4324     pp_context->surface_state_binding_table.bo = bo;
4325
4326     dri_bo_unreference(pp_context->curbe.bo);
4327     bo = dri_bo_alloc(i965->intel.bufmgr,
4328                       "constant buffer",
4329                       4096, 
4330                       4096);
4331     assert(bo);
4332     pp_context->curbe.bo = bo;
4333
4334     dri_bo_unreference(pp_context->idrt.bo);
4335     bo = dri_bo_alloc(i965->intel.bufmgr, 
4336                       "interface discriptor", 
4337                       sizeof(struct gen6_interface_descriptor_data), 
4338                       4096);
4339     assert(bo);
4340     pp_context->idrt.bo = bo;
4341     pp_context->idrt.num_interface_descriptors = 0;
4342
4343     dri_bo_unreference(pp_context->sampler_state_table.bo);
4344     bo = dri_bo_alloc(i965->intel.bufmgr, 
4345                       "sampler state table", 
4346                       4096,
4347                       4096);
4348     assert(bo);
4349     dri_bo_map(bo, True);
4350     memset(bo->virtual, 0, bo->size);
4351     dri_bo_unmap(bo);
4352     pp_context->sampler_state_table.bo = bo;
4353
4354     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4355     bo = dri_bo_alloc(i965->intel.bufmgr, 
4356                       "sampler 8x8 state ",
4357                       4096,
4358                       4096);
4359     assert(bo);
4360     pp_context->sampler_state_table.bo_8x8 = bo;
4361
4362     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4363     bo = dri_bo_alloc(i965->intel.bufmgr, 
4364                       "sampler 8x8 state ",
4365                       4096,
4366                       4096);
4367     assert(bo);
4368     pp_context->sampler_state_table.bo_8x8_uv = bo;
4369
4370     dri_bo_unreference(pp_context->vfe_state.bo);
4371     bo = dri_bo_alloc(i965->intel.bufmgr, 
4372                       "vfe state", 
4373                       sizeof(struct i965_vfe_state), 
4374                       4096);
4375     assert(bo);
4376     pp_context->vfe_state.bo = bo;
4377     
4378     if (IS_GEN7(i965->intel.device_info)) {
4379         static_param_size = sizeof(struct gen7_pp_static_parameter);
4380         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4381     } else {
4382         static_param_size = sizeof(struct pp_static_parameter);
4383         inline_param_size = sizeof(struct pp_inline_parameter);
4384     }
4385
4386     memset(pp_context->pp_static_parameter, 0, static_param_size);
4387     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4388
4389     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4390     pp_context->current_pp = pp_index;
4391     pp_module = &pp_context->pp_modules[pp_index];
4392     
4393     if (pp_module->initialize)
4394         va_status = pp_module->initialize(ctx, pp_context,
4395                                           src_surface,
4396                                           src_rect,
4397                                           dst_surface,
4398                                           dst_rect,
4399                                           filter_param);
4400     else
4401         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4402
4403     calculate_boundary_block_mask(pp_context, dst_rect);
4404
4405     return va_status;
4406 }
4407
4408
4409 static void
4410 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4411                                    struct i965_post_processing_context *pp_context)
4412 {
4413     struct i965_driver_data *i965 = i965_driver_data(ctx);
4414     struct gen6_interface_descriptor_data *desc;
4415     dri_bo *bo;
4416     int pp_index = pp_context->current_pp;
4417
4418     bo = pp_context->idrt.bo;
4419     dri_bo_map(bo, True);
4420     assert(bo->virtual);
4421     desc = bo->virtual;
4422     memset(desc, 0, sizeof(*desc));
4423     desc->desc0.kernel_start_pointer = 
4424         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4425     desc->desc1.single_program_flow = 1;
4426     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4427     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4428     desc->desc2.sampler_state_pointer = 
4429         pp_context->sampler_state_table.bo->offset >> 5;
4430     desc->desc3.binding_table_entry_count = 0;
4431     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4432     desc->desc4.constant_urb_entry_read_offset = 0;
4433
4434     if (IS_GEN7(i965->intel.device_info))
4435         desc->desc4.constant_urb_entry_read_length = 8; /* grf 1-8 */
4436     else
4437         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4438
4439     dri_bo_emit_reloc(bo,
4440                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4441                       0,
4442                       offsetof(struct gen6_interface_descriptor_data, desc0),
4443                       pp_context->pp_modules[pp_index].kernel.bo);
4444
4445     dri_bo_emit_reloc(bo,
4446                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4447                       desc->desc2.sampler_count << 2,
4448                       offsetof(struct gen6_interface_descriptor_data, desc2),
4449                       pp_context->sampler_state_table.bo);
4450
4451     dri_bo_unmap(bo);
4452     pp_context->idrt.num_interface_descriptors++;
4453 }
4454
4455 static void
4456 gen6_pp_upload_constants(VADriverContextP ctx,
4457                          struct i965_post_processing_context *pp_context)
4458 {
4459     struct i965_driver_data *i965 = i965_driver_data(ctx);
4460     unsigned char *constant_buffer;
4461     int param_size;
4462
4463     assert(sizeof(struct pp_static_parameter) == 128);
4464     assert(sizeof(struct gen7_pp_static_parameter) == 256);
4465
4466     if (IS_GEN7(i965->intel.device_info))
4467         param_size = sizeof(struct gen7_pp_static_parameter);
4468     else
4469         param_size = sizeof(struct pp_static_parameter);
4470
4471     dri_bo_map(pp_context->curbe.bo, 1);
4472     assert(pp_context->curbe.bo->virtual);
4473     constant_buffer = pp_context->curbe.bo->virtual;
4474     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4475     dri_bo_unmap(pp_context->curbe.bo);
4476 }
4477
4478 static void
4479 gen6_pp_states_setup(VADriverContextP ctx,
4480                      struct i965_post_processing_context *pp_context)
4481 {
4482     gen6_pp_interface_descriptor_table(ctx, pp_context);
4483     gen6_pp_upload_constants(ctx, pp_context);
4484 }
4485
4486 static void
4487 gen6_pp_pipeline_select(VADriverContextP ctx,
4488                         struct i965_post_processing_context *pp_context)
4489 {
4490     struct intel_batchbuffer *batch = pp_context->batch;
4491
4492     BEGIN_BATCH(batch, 1);
4493     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4494     ADVANCE_BATCH(batch);
4495 }
4496
4497 static void
4498 gen6_pp_state_base_address(VADriverContextP ctx,
4499                            struct i965_post_processing_context *pp_context)
4500 {
4501     struct intel_batchbuffer *batch = pp_context->batch;
4502
4503     BEGIN_BATCH(batch, 10);
4504     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4505     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4506     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4507     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4508     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4509     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4510     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4511     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4512     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4513     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4514     ADVANCE_BATCH(batch);
4515 }
4516
4517 static void
4518 gen6_pp_vfe_state(VADriverContextP ctx,
4519                   struct i965_post_processing_context *pp_context)
4520 {
4521     struct intel_batchbuffer *batch = pp_context->batch;
4522
4523     BEGIN_BATCH(batch, 8);
4524     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4525     OUT_BATCH(batch, 0);
4526     OUT_BATCH(batch,
4527               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4528               pp_context->vfe_gpu_state.num_urb_entries << 8);
4529     OUT_BATCH(batch, 0);
4530     OUT_BATCH(batch,
4531               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
4532                 /* URB Entry Allocation Size, in 256 bits unit */
4533               (pp_context->vfe_gpu_state.curbe_allocation_size));
4534                 /* CURBE Allocation Size, in 256 bits unit */
4535     OUT_BATCH(batch, 0);
4536     OUT_BATCH(batch, 0);
4537     OUT_BATCH(batch, 0);
4538     ADVANCE_BATCH(batch);
4539 }
4540
4541 static void
4542 gen6_pp_curbe_load(VADriverContextP ctx,
4543                    struct i965_post_processing_context *pp_context)
4544 {
4545     struct intel_batchbuffer *batch = pp_context->batch;
4546     struct i965_driver_data *i965 = i965_driver_data(ctx);
4547     int param_size;
4548
4549     if (IS_GEN7(i965->intel.device_info))
4550         param_size = sizeof(struct gen7_pp_static_parameter);
4551     else
4552         param_size = sizeof(struct pp_static_parameter);
4553
4554     BEGIN_BATCH(batch, 4);
4555     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4556     OUT_BATCH(batch, 0);
4557     OUT_BATCH(batch,
4558               param_size);
4559     OUT_RELOC(batch, 
4560               pp_context->curbe.bo,
4561               I915_GEM_DOMAIN_INSTRUCTION, 0,
4562               0);
4563     ADVANCE_BATCH(batch);
4564 }
4565
4566 static void
4567 gen6_interface_descriptor_load(VADriverContextP ctx,
4568                                struct i965_post_processing_context *pp_context)
4569 {
4570     struct intel_batchbuffer *batch = pp_context->batch;
4571
4572     BEGIN_BATCH(batch, 4);
4573     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4574     OUT_BATCH(batch, 0);
4575     OUT_BATCH(batch,
4576               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4577     OUT_RELOC(batch, 
4578               pp_context->idrt.bo,
4579               I915_GEM_DOMAIN_INSTRUCTION, 0,
4580               0);
4581     ADVANCE_BATCH(batch);
4582 }
4583
4584 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4585 {
4586     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4587
4588     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4589     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4590     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4591     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4592     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4593     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4594
4595     /* 1 x N */
4596     if (x_steps == 1) {
4597         if (y == y_steps-1) {
4598             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4599         }
4600         else {
4601             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4602         }
4603     }
4604
4605     /* M x 1 */
4606     if (y_steps == 1) {
4607         if (x == 0) { // all blocks in this group are on the left edge
4608             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4609             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4610         }
4611         else if (x == x_steps-1) {
4612             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4613             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4614         }
4615         else {
4616             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4617             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4618             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4619         }
4620     }
4621
4622 }
4623
4624 static void
4625 gen6_pp_object_walker(VADriverContextP ctx,
4626                       struct i965_post_processing_context *pp_context)
4627 {
4628     struct i965_driver_data *i965 = i965_driver_data(ctx);
4629     struct intel_batchbuffer *batch = pp_context->batch;
4630     int x, x_steps, y, y_steps;
4631     int param_size, command_length_in_dws;
4632     dri_bo *command_buffer;
4633     unsigned int *command_ptr;
4634
4635     if (IS_GEN7(i965->intel.device_info))
4636         param_size = sizeof(struct gen7_pp_inline_parameter);
4637     else
4638         param_size = sizeof(struct pp_inline_parameter);
4639
4640     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4641     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4642     command_length_in_dws = 6 + (param_size >> 2);
4643     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4644                                   "command objects buffer",
4645                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4646                                   4096);
4647
4648     dri_bo_map(command_buffer, 1);
4649     command_ptr = command_buffer->virtual;
4650
4651     for (y = 0; y < y_steps; y++) {
4652         for (x = 0; x < x_steps; x++) {
4653             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4654                 // some common block parameter update goes here, apply to all pp functions
4655                 if (IS_GEN6(i965->intel.device_info))
4656                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4657                 
4658                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4659                 *command_ptr++ = 0;
4660                 *command_ptr++ = 0;
4661                 *command_ptr++ = 0;
4662                 *command_ptr++ = 0;
4663                 *command_ptr++ = 0;
4664                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4665                 command_ptr += (param_size >> 2);
4666             }
4667         }
4668     }
4669
4670     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4671         *command_ptr++ = 0;
4672
4673     *command_ptr = MI_BATCH_BUFFER_END;
4674
4675     dri_bo_unmap(command_buffer);
4676
4677     BEGIN_BATCH(batch, 2);
4678     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4679     OUT_RELOC(batch, command_buffer,
4680               I915_GEM_DOMAIN_COMMAND, 0,
4681               0);
4682     ADVANCE_BATCH(batch);
4683
4684     dri_bo_unreference(command_buffer);
4685
4686     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4687      * will cause control to pass back to ring buffer 
4688      */
4689     intel_batchbuffer_end_atomic(batch);
4690     intel_batchbuffer_flush(batch);
4691     intel_batchbuffer_start_atomic(batch, 0x1000);
4692 }
4693
4694 static void
4695 gen6_pp_pipeline_setup(VADriverContextP ctx,
4696                        struct i965_post_processing_context *pp_context)
4697 {
4698     struct intel_batchbuffer *batch = pp_context->batch;
4699
4700     intel_batchbuffer_start_atomic(batch, 0x1000);
4701     intel_batchbuffer_emit_mi_flush(batch);
4702     gen6_pp_pipeline_select(ctx, pp_context);
4703     gen6_pp_state_base_address(ctx, pp_context);
4704     gen6_pp_vfe_state(ctx, pp_context);
4705     gen6_pp_curbe_load(ctx, pp_context);
4706     gen6_interface_descriptor_load(ctx, pp_context);
4707     gen6_pp_object_walker(ctx, pp_context);
4708     intel_batchbuffer_end_atomic(batch);
4709 }
4710
4711 static VAStatus
4712 gen6_post_processing(
4713     VADriverContextP ctx,
4714     struct i965_post_processing_context *pp_context,
4715     const struct i965_surface *src_surface,
4716     const VARectangle *src_rect,
4717     struct i965_surface *dst_surface,
4718     const VARectangle *dst_rect,
4719     int pp_index,
4720     void *filter_param
4721 )
4722 {
4723     VAStatus va_status;
4724     
4725     va_status = gen6_pp_initialize(ctx, pp_context,
4726                                    src_surface,
4727                                    src_rect,
4728                                    dst_surface,
4729                                    dst_rect,
4730                                    pp_index,
4731                                    filter_param);
4732
4733     if (va_status == VA_STATUS_SUCCESS) {
4734         gen6_pp_states_setup(ctx, pp_context);
4735         gen6_pp_pipeline_setup(ctx, pp_context);
4736     }
4737
4738     if (va_status == VA_STATUS_SUCCESS_1)
4739         va_status = VA_STATUS_SUCCESS;
4740
4741     return va_status;
4742 }
4743
4744 static VAStatus
4745 i965_post_processing_internal(
4746     VADriverContextP   ctx,
4747     struct i965_post_processing_context *pp_context,
4748     const struct i965_surface *src_surface,
4749     const VARectangle *src_rect,
4750     struct i965_surface *dst_surface,
4751     const VARectangle *dst_rect,
4752     int                pp_index,
4753     void *filter_param
4754 )
4755 {
4756     VAStatus va_status;
4757
4758     if (pp_context && pp_context->intel_post_processing) {
4759         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4760                           src_surface, src_rect,
4761                           dst_surface, dst_rect,
4762                           pp_index, filter_param);
4763     } else {
4764         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4765     }
4766
4767     return va_status;
4768 }
4769
4770 static void
4771 rgb_to_yuv(unsigned int argb,
4772            unsigned char *y,
4773            unsigned char *u,
4774            unsigned char *v,
4775            unsigned char *a)
4776 {
4777     int r = ((argb >> 16) & 0xff);
4778     int g = ((argb >> 8) & 0xff);
4779     int b = ((argb >> 0) & 0xff);
4780     
4781     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4782     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4783     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4784     *a = ((argb >> 24) & 0xff);
4785 }
4786
4787 static void 
4788 i965_vpp_clear_surface(VADriverContextP ctx,
4789                        struct i965_post_processing_context *pp_context,
4790                        struct object_surface *obj_surface,
4791                        unsigned int color)
4792 {
4793     struct i965_driver_data *i965 = i965_driver_data(ctx);
4794     struct intel_batchbuffer *batch = pp_context->batch;
4795     unsigned int blt_cmd, br13;
4796     unsigned int tiling = 0, swizzle = 0;
4797     int pitch;
4798     unsigned char y, u, v, a = 0;
4799     int region_width, region_height;
4800
4801     /* Currently only support NV12 surface */
4802     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4803         return;
4804
4805     rgb_to_yuv(color, &y, &u, &v, &a);
4806
4807     if (a == 0)
4808         return;
4809
4810     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4811     blt_cmd = XY_COLOR_BLT_CMD;
4812     pitch = obj_surface->width;
4813
4814     if (tiling != I915_TILING_NONE) {
4815         assert(tiling == I915_TILING_Y);
4816         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4817         // pitch >>= 2;
4818     }
4819
4820     br13 = 0xf0 << 16;
4821     br13 |= BR13_8;
4822     br13 |= pitch;
4823
4824     if (IS_IRONLAKE(i965->intel.device_info)) {
4825         intel_batchbuffer_start_atomic(batch, 48);
4826         BEGIN_BATCH(batch, 12);
4827     } else {
4828         /* Will double-check the command if the new chipset is added */
4829         intel_batchbuffer_start_atomic_blt(batch, 48);
4830         BEGIN_BLT_BATCH(batch, 12);
4831     }
4832
4833     region_width = obj_surface->width;
4834     region_height = obj_surface->height;
4835
4836     OUT_BATCH(batch, blt_cmd);
4837     OUT_BATCH(batch, br13);
4838     OUT_BATCH(batch,
4839               0 << 16 |
4840               0);
4841     OUT_BATCH(batch,
4842               region_height << 16 |
4843               region_width);
4844     OUT_RELOC(batch, obj_surface->bo, 
4845               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4846               0);
4847     OUT_BATCH(batch, y);
4848
4849     br13 = 0xf0 << 16;
4850     br13 |= BR13_565;
4851     br13 |= pitch;
4852
4853     region_width = obj_surface->width / 2;
4854     region_height = obj_surface->height / 2;
4855
4856     if (tiling == I915_TILING_Y) {
4857         region_height = ALIGN(obj_surface->height / 2, 32);
4858     }
4859
4860     OUT_BATCH(batch, blt_cmd);
4861     OUT_BATCH(batch, br13);
4862     OUT_BATCH(batch,
4863               0 << 16 |
4864               0);
4865     OUT_BATCH(batch,
4866               region_height << 16 |
4867               region_width);
4868     OUT_RELOC(batch, obj_surface->bo, 
4869               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4870               obj_surface->width * obj_surface->y_cb_offset);
4871     OUT_BATCH(batch, v << 8 | u);
4872
4873     ADVANCE_BATCH(batch);
4874     intel_batchbuffer_end_atomic(batch);
4875 }
4876
4877 VAStatus
4878 i965_scaling_processing(
4879     VADriverContextP   ctx,
4880     struct object_surface *src_surface_obj,
4881     const VARectangle *src_rect,
4882     struct object_surface *dst_surface_obj,
4883     const VARectangle *dst_rect,
4884     unsigned int       va_flags)
4885 {
4886     VAStatus va_status = VA_STATUS_SUCCESS;
4887     struct i965_driver_data *i965 = i965_driver_data(ctx);
4888  
4889     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4890     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4891
4892     if (HAS_VPP(i965)) {
4893         struct i965_surface src_surface;
4894         struct i965_surface dst_surface;
4895         struct i965_post_processing_context *pp_context;
4896         unsigned int filter_flags;
4897
4898          _i965LockMutex(&i965->pp_mutex);
4899
4900          src_surface.base = (struct object_base *)src_surface_obj;
4901          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4902          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4903          dst_surface.base = (struct object_base *)dst_surface_obj;
4904          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4905          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4906
4907          pp_context = i965->pp_context;
4908          filter_flags = pp_context->filter_flags;
4909          pp_context->filter_flags = va_flags;
4910
4911          va_status = i965_post_processing_internal(ctx, pp_context,
4912              &src_surface, src_rect, &dst_surface, dst_rect,
4913              avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
4914
4915          pp_context->filter_flags = filter_flags;
4916
4917          _i965UnlockMutex(&i965->pp_mutex);
4918     }
4919
4920     return va_status;
4921 }
4922
4923 VASurfaceID
4924 i965_post_processing(
4925     VADriverContextP   ctx,
4926     struct object_surface *obj_surface,
4927     const VARectangle *src_rect,
4928     const VARectangle *dst_rect,
4929     unsigned int       va_flags,
4930     int               *has_done_scaling,
4931     VARectangle *calibrated_rect
4932 )
4933 {
4934     struct i965_driver_data *i965 = i965_driver_data(ctx);
4935     VASurfaceID out_surface_id = VA_INVALID_ID;
4936     VASurfaceID tmp_id = VA_INVALID_ID;
4937     
4938     *has_done_scaling = 0;
4939
4940     if (HAS_VPP(i965)) {
4941         VAStatus status;
4942         struct i965_surface src_surface;
4943         struct i965_surface dst_surface;
4944         struct i965_post_processing_context *pp_context;
4945
4946         /* Currently only support post processing for NV12 surface */
4947         if (obj_surface->fourcc != VA_FOURCC_NV12)
4948             return out_surface_id;
4949
4950         _i965LockMutex(&i965->pp_mutex);
4951
4952         pp_context = i965->pp_context;
4953         pp_context->filter_flags = va_flags;
4954         if (avs_is_needed(va_flags)) {
4955             VARectangle tmp_dst_rect;
4956
4957             if (out_surface_id != VA_INVALID_ID)
4958                 tmp_id = out_surface_id;
4959
4960             tmp_dst_rect.x = 0;
4961             tmp_dst_rect.y = 0;
4962             tmp_dst_rect.width = dst_rect->width;
4963             tmp_dst_rect.height = dst_rect->height;
4964             src_surface.base = (struct object_base *)obj_surface;
4965             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4966             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4967
4968             status = i965_CreateSurfaces(ctx,
4969                                          dst_rect->width,
4970                                          dst_rect->height,
4971                                          VA_RT_FORMAT_YUV420,
4972                                          1,
4973                                          &out_surface_id);
4974             assert(status == VA_STATUS_SUCCESS);
4975             obj_surface = SURFACE(out_surface_id);
4976             assert(obj_surface);
4977             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4978             i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
4979
4980             dst_surface.base = (struct object_base *)obj_surface;
4981             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4982             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4983
4984             i965_post_processing_internal(ctx, pp_context,
4985                                           &src_surface,
4986                                           src_rect,
4987                                           &dst_surface,
4988                                           &tmp_dst_rect,
4989                                           PP_NV12_AVS,
4990                                           NULL);
4991
4992             if (tmp_id != VA_INVALID_ID)
4993                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4994                 
4995             *has_done_scaling = 1;
4996             calibrated_rect->x = 0;
4997             calibrated_rect->y = 0;
4998             calibrated_rect->width = dst_rect->width;
4999             calibrated_rect->height = dst_rect->height;
5000         }
5001
5002         _i965UnlockMutex(&i965->pp_mutex);
5003     }
5004
5005     return out_surface_id;
5006 }       
5007
5008 static VAStatus
5009 i965_image_pl2_processing(VADriverContextP ctx,
5010                           const struct i965_surface *src_surface,
5011                           const VARectangle *src_rect,
5012                           struct i965_surface *dst_surface,
5013                           const VARectangle *dst_rect);
5014
5015 static VAStatus
5016 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
5017                                    VAStatus (*i965_image_plx_nv12_processing)(
5018                                        VADriverContextP,
5019                                        const struct i965_surface *,
5020                                        const VARectangle *,
5021                                        struct i965_surface *,
5022                                        const VARectangle *),
5023                                    const struct i965_surface *src_surface,
5024                                    const VARectangle *src_rect,
5025                                    struct i965_surface *dst_surface,
5026                                    const VARectangle *dst_rect)
5027 {
5028     struct i965_driver_data *i965 = i965_driver_data(ctx);
5029     VAStatus status;
5030     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5031     struct object_surface *obj_surface = NULL;
5032     struct i965_surface tmp_surface;
5033     int width, height;
5034
5035     pp_get_surface_size(ctx, dst_surface, &width, &height);
5036     status = i965_CreateSurfaces(ctx,
5037                                  width,
5038                                  height,
5039                                  VA_RT_FORMAT_YUV420,
5040                                  1,
5041                                  &tmp_surface_id);
5042     assert(status == VA_STATUS_SUCCESS);
5043     obj_surface = SURFACE(tmp_surface_id);
5044     assert(obj_surface);
5045     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5046
5047     tmp_surface.base = (struct object_base *)obj_surface;
5048     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5049     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5050
5051     status = i965_image_plx_nv12_processing(ctx,
5052                                             src_surface,
5053                                             src_rect,
5054                                             &tmp_surface,
5055                                             dst_rect);
5056
5057     if (status == VA_STATUS_SUCCESS)
5058         status = i965_image_pl2_processing(ctx,
5059                                            &tmp_surface,
5060                                            dst_rect,
5061                                            dst_surface,
5062                                            dst_rect);
5063
5064     i965_DestroySurfaces(ctx,
5065                          &tmp_surface_id,
5066                          1);
5067
5068     return status;
5069 }
5070
5071
5072 static VAStatus
5073 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5074                                const struct i965_surface *src_surface,
5075                                const VARectangle *src_rect,
5076                                struct i965_surface *dst_surface,
5077                                const VARectangle *dst_rect)
5078 {
5079     struct i965_driver_data *i965 = i965_driver_data(ctx);
5080     struct i965_post_processing_context *pp_context = i965->pp_context;
5081     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5082     VAStatus vaStatus;
5083
5084     switch (fourcc) {
5085     case VA_FOURCC_NV12:
5086         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5087                                                  src_surface,
5088                                                  src_rect,
5089                                                  dst_surface,
5090                                                  dst_rect,
5091                                                  PP_RGBX_LOAD_SAVE_NV12,
5092                                                  NULL);
5093         intel_batchbuffer_flush(pp_context->batch);
5094         break;
5095
5096     default:
5097         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5098                                                       i965_image_pl1_rgbx_processing,
5099                                                       src_surface,
5100                                                       src_rect,
5101                                                       dst_surface,
5102                                                       dst_rect);
5103         break;
5104     }
5105
5106     return vaStatus;
5107 }
5108
5109 static VAStatus
5110 i965_image_pl3_processing(VADriverContextP ctx,
5111                           const struct i965_surface *src_surface,
5112                           const VARectangle *src_rect,
5113                           struct i965_surface *dst_surface,
5114                           const VARectangle *dst_rect)
5115 {
5116     struct i965_driver_data *i965 = i965_driver_data(ctx);
5117     struct i965_post_processing_context *pp_context = i965->pp_context;
5118     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5119     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5120
5121     switch (fourcc) {
5122     case VA_FOURCC_NV12:
5123         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5124                                                  src_surface,
5125                                                  src_rect,
5126                                                  dst_surface,
5127                                                  dst_rect,
5128                                                  PP_PL3_LOAD_SAVE_N12,
5129                                                  NULL);
5130         intel_batchbuffer_flush(pp_context->batch);
5131         break;
5132
5133     case VA_FOURCC_IMC1:
5134     case VA_FOURCC_IMC3:
5135     case VA_FOURCC_YV12:
5136     case VA_FOURCC_I420:
5137         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5138                                                  src_surface,
5139                                                  src_rect,
5140                                                  dst_surface,
5141                                                  dst_rect,
5142                                                  PP_PL3_LOAD_SAVE_PL3,
5143                                                  NULL);
5144         intel_batchbuffer_flush(pp_context->batch);
5145         break;
5146
5147     case VA_FOURCC_YUY2:
5148     case VA_FOURCC_UYVY:
5149         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5150                                                  src_surface,
5151                                                  src_rect,
5152                                                  dst_surface,
5153                                                  dst_rect,
5154                                                  PP_PL3_LOAD_SAVE_PA,
5155                                                  NULL);
5156         intel_batchbuffer_flush(pp_context->batch);
5157         break;
5158
5159     default:
5160         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5161                                                       i965_image_pl3_processing,
5162                                                       src_surface,
5163                                                       src_rect,
5164                                                       dst_surface,
5165                                                       dst_rect);
5166         break;
5167     }
5168
5169     return vaStatus;
5170 }
5171
5172 static VAStatus
5173 i965_image_pl2_processing(VADriverContextP ctx,
5174                           const struct i965_surface *src_surface,
5175                           const VARectangle *src_rect,
5176                           struct i965_surface *dst_surface,
5177                           const VARectangle *dst_rect)
5178 {
5179     struct i965_driver_data *i965 = i965_driver_data(ctx);
5180     struct i965_post_processing_context *pp_context = i965->pp_context;
5181     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5182     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5183
5184     switch (fourcc) {
5185     case VA_FOURCC_NV12:
5186         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5187                                                  src_surface,
5188                                                  src_rect,
5189                                                  dst_surface,
5190                                                  dst_rect,
5191                                                  PP_NV12_LOAD_SAVE_N12,
5192                                                  NULL);
5193         break;
5194
5195     case VA_FOURCC_IMC1:
5196     case VA_FOURCC_IMC3:
5197     case VA_FOURCC_YV12:
5198     case VA_FOURCC_I420:
5199         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5200                                                  src_surface,
5201                                                  src_rect,
5202                                                  dst_surface,
5203                                                  dst_rect,
5204                                                  PP_NV12_LOAD_SAVE_PL3,
5205                                                  NULL);
5206         break;
5207
5208     case VA_FOURCC_YUY2:
5209     case VA_FOURCC_UYVY:
5210         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5211                                                  src_surface,
5212                                                  src_rect,
5213                                                  dst_surface,
5214                                                  dst_rect,
5215                                                  PP_NV12_LOAD_SAVE_PA,
5216                                                  NULL);
5217         break;
5218
5219     case VA_FOURCC_BGRX:
5220     case VA_FOURCC_BGRA:
5221     case VA_FOURCC_RGBX:
5222     case VA_FOURCC_RGBA:
5223         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5224                                                  src_surface,
5225                                                  src_rect,
5226                                                  dst_surface,
5227                                                  dst_rect,
5228                                                  PP_NV12_LOAD_SAVE_RGBX,
5229                                                  NULL);
5230         break;
5231
5232     default:
5233         return VA_STATUS_ERROR_UNIMPLEMENTED;
5234     }
5235
5236     intel_batchbuffer_flush(pp_context->batch);
5237
5238     return vaStatus;
5239 }
5240
5241 static VAStatus
5242 i965_image_pl1_processing(VADriverContextP ctx,
5243                           const struct i965_surface *src_surface,
5244                           const VARectangle *src_rect,
5245                           struct i965_surface *dst_surface,
5246                           const VARectangle *dst_rect)
5247 {
5248     struct i965_driver_data *i965 = i965_driver_data(ctx);
5249     struct i965_post_processing_context *pp_context = i965->pp_context;
5250     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5251     VAStatus vaStatus;
5252
5253     switch (fourcc) {
5254     case VA_FOURCC_NV12:
5255         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5256                                                  src_surface,
5257                                                  src_rect,
5258                                                  dst_surface,
5259                                                  dst_rect,
5260                                                  PP_PA_LOAD_SAVE_NV12,
5261                                                  NULL);
5262         intel_batchbuffer_flush(pp_context->batch);
5263         break;
5264
5265     case VA_FOURCC_YV12:
5266         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5267                                                  src_surface,
5268                                                  src_rect,
5269                                                  dst_surface,
5270                                                  dst_rect,
5271                                                  PP_PA_LOAD_SAVE_PL3,
5272                                                  NULL);
5273         intel_batchbuffer_flush(pp_context->batch);
5274         break;
5275
5276     case VA_FOURCC_YUY2:
5277     case VA_FOURCC_UYVY:
5278         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5279                                                  src_surface,
5280                                                  src_rect,
5281                                                  dst_surface,
5282                                                  dst_rect,
5283                                                  PP_PA_LOAD_SAVE_PA,
5284                                                  NULL);
5285         intel_batchbuffer_flush(pp_context->batch);
5286         break;
5287
5288     default:
5289         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5290                                                       i965_image_pl1_processing,
5291                                                       src_surface,
5292                                                       src_rect,
5293                                                       dst_surface,
5294                                                       dst_rect);
5295         break;
5296     }
5297
5298     return vaStatus;
5299 }
5300
5301 // it only support NV12 and P010 for vebox proc ctx
5302 static struct object_surface *derive_surface(VADriverContextP ctx,
5303                                              struct object_image *obj_image,
5304                                              struct object_surface *obj_surface)
5305 {
5306     VAImage * const image = &obj_image->image;
5307
5308     memset((void *)obj_surface, 0, sizeof(*obj_surface));
5309     obj_surface->fourcc = image->format.fourcc;
5310     obj_surface->orig_width = image->width;
5311     obj_surface->orig_height = image->height;
5312     obj_surface->width = image->pitches[0];
5313     obj_surface->height = image->height;
5314     obj_surface->y_cb_offset = image->offsets[1] / obj_surface->width;
5315     obj_surface->y_cr_offset = obj_surface->y_cb_offset;
5316     obj_surface->bo = obj_image->bo;
5317     obj_surface->subsampling = SUBSAMPLE_YUV420;
5318
5319     return obj_surface;
5320 }
5321
5322 static VAStatus
5323 vebox_processing_simple(VADriverContextP ctx,
5324                         struct i965_post_processing_context *pp_context,
5325                         struct object_surface *src_obj_surface,
5326                         struct object_surface *dst_obj_surface,
5327                         const VARectangle *rect)
5328 {
5329     struct i965_driver_data *i965 = i965_driver_data(ctx);
5330     VAProcPipelineParameterBuffer pipeline_param;
5331     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5332
5333     if(pp_context->vebox_proc_ctx == NULL) {
5334         pp_context->vebox_proc_ctx = gen75_vebox_context_init(ctx);
5335     }
5336
5337     memset((void *)&pipeline_param, 0, sizeof(pipeline_param));
5338     pipeline_param.surface_region = rect;
5339     pipeline_param.output_region = rect;
5340     pipeline_param.filter_flags = 0;
5341     pipeline_param.num_filters  = 0;
5342
5343     pp_context->vebox_proc_ctx->pipeline_param = &pipeline_param;
5344     pp_context->vebox_proc_ctx->surface_input_object = src_obj_surface;
5345     pp_context->vebox_proc_ctx->surface_output_object = dst_obj_surface;
5346
5347     if (IS_GEN9(i965->intel.device_info))
5348         status = gen9_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5349
5350     return status;
5351 }
5352
5353 static VAStatus
5354 i965_image_p010_processing(VADriverContextP ctx,
5355                           const struct i965_surface *src_surface,
5356                           const VARectangle *src_rect,
5357                           struct i965_surface *dst_surface,
5358                           const VARectangle *dst_rect)
5359 {
5360 #define HAS_VPP_P010(ctx)        ((ctx)->codec_info->has_vpp_p010 && \
5361                                      (ctx)->intel.has_bsd)
5362
5363     struct i965_driver_data *i965 = i965_driver_data(ctx);
5364     struct i965_post_processing_context *pp_context = i965->pp_context;
5365     struct object_surface *src_obj_surface = NULL, *dst_obj_surface = NULL;
5366     struct object_surface tmp_src_obj_surface, tmp_dst_obj_surface;
5367     struct object_surface *tmp_surface = NULL;
5368     VASurfaceID tmp_surface_id[3], out_surface_id = VA_INVALID_ID;
5369     int num_tmp_surfaces = 0;
5370     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5371     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5372     int vpp_post = 0;
5373
5374     if(HAS_VPP_P010(i965)) {
5375         vpp_post = 0;
5376         switch(fourcc) {
5377         case VA_FOURCC_NV12:
5378             if(src_rect->x != dst_rect->x ||
5379                 src_rect->y != dst_rect->y ||
5380                 src_rect->width != dst_rect->width ||
5381                 src_rect->height != dst_rect->height) {
5382                 vpp_post = 1;
5383             }
5384             break;
5385         case VA_FOURCC_P010:
5386             // don't support scaling while the fourcc of dst_surface is P010
5387             if(src_rect->x != dst_rect->x ||
5388                 src_rect->y != dst_rect->y ||
5389                 src_rect->width != dst_rect->width ||
5390                 src_rect->height != dst_rect->height) {
5391                 vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5392                 goto EXIT;
5393             }
5394             break;
5395         default:
5396             vpp_post = 1;
5397             break;
5398         }
5399
5400         if(src_surface->type == I965_SURFACE_TYPE_IMAGE) {
5401             src_obj_surface = derive_surface(ctx, (struct object_image *)src_surface->base,
5402                                              &tmp_src_obj_surface);
5403         }
5404         else
5405             src_obj_surface = (struct object_surface *)src_surface->base;
5406
5407         if(src_obj_surface == NULL) {
5408             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5409             goto EXIT;
5410         }
5411
5412         if(vpp_post == 1) {
5413             vaStatus = i965_CreateSurfaces(ctx,
5414                                          src_obj_surface->orig_width,
5415                                          src_obj_surface->orig_height,
5416                                          VA_RT_FORMAT_YUV420,
5417                                          1,
5418                                          &out_surface_id);
5419             assert(vaStatus == VA_STATUS_SUCCESS);
5420             tmp_surface_id[num_tmp_surfaces++] = out_surface_id;
5421             tmp_surface = SURFACE(out_surface_id);
5422             assert(tmp_surface);
5423             i965_check_alloc_surface_bo(ctx, tmp_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5424         }
5425
5426         if(tmp_surface != NULL)
5427             dst_obj_surface = tmp_surface;
5428         else {
5429             if(dst_surface->type == I965_SURFACE_TYPE_IMAGE) {
5430                 dst_obj_surface = derive_surface(ctx, (struct object_image *)dst_surface->base,
5431                                                  &tmp_dst_obj_surface);
5432             }
5433             else
5434                 dst_obj_surface = (struct object_surface *)dst_surface->base;
5435         }
5436
5437         if(dst_obj_surface == NULL) {
5438             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5439             goto EXIT;
5440         }
5441
5442         vaStatus = vebox_processing_simple(ctx,
5443                                          pp_context,
5444                                          src_obj_surface,
5445                                          dst_obj_surface,
5446                                          src_rect);
5447         if(vaStatus != VA_STATUS_SUCCESS)
5448             goto EXIT;
5449
5450         if(vpp_post == 1) {
5451             struct i965_surface src_surface_new;
5452
5453             if(tmp_surface != NULL){
5454                 src_surface_new.base = (struct object_base *)tmp_surface;
5455                 src_surface_new.type = I965_SURFACE_TYPE_SURFACE;
5456                 src_surface_new.flags = I965_SURFACE_FLAG_FRAME;
5457             }
5458             else
5459                 memcpy((void *)&src_surface_new, (void *)src_surface, sizeof(src_surface_new));
5460
5461             vaStatus = i965_image_pl2_processing(ctx,
5462                                                &src_surface_new,
5463                                                src_rect,
5464                                                dst_surface,
5465                                                dst_rect);
5466         }
5467     }
5468
5469 EXIT:
5470     if(num_tmp_surfaces)
5471         i965_DestroySurfaces(ctx,
5472                              tmp_surface_id,
5473                              num_tmp_surfaces);
5474
5475     return vaStatus;
5476 }
5477
5478 VAStatus
5479 i965_image_processing(VADriverContextP ctx,
5480                       const struct i965_surface *src_surface,
5481                       const VARectangle *src_rect,
5482                       struct i965_surface *dst_surface,
5483                       const VARectangle *dst_rect)
5484 {
5485     struct i965_driver_data *i965 = i965_driver_data(ctx);
5486     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5487
5488     if (HAS_VPP(i965)) {
5489         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5490
5491         _i965LockMutex(&i965->pp_mutex);
5492
5493         switch (fourcc) {
5494         case VA_FOURCC_YV12:
5495         case VA_FOURCC_I420:
5496         case VA_FOURCC_IMC1:
5497         case VA_FOURCC_IMC3:
5498         case VA_FOURCC_422H:
5499         case VA_FOURCC_422V:
5500         case VA_FOURCC_411P:
5501         case VA_FOURCC_444P:
5502         case VA_FOURCC_YV16:
5503             status = i965_image_pl3_processing(ctx,
5504                                                src_surface,
5505                                                src_rect,
5506                                                dst_surface,
5507                                                dst_rect);
5508             break;
5509
5510         case  VA_FOURCC_NV12:
5511             status = i965_image_pl2_processing(ctx,
5512                                                src_surface,
5513                                                src_rect,
5514                                                dst_surface,
5515                                                dst_rect);
5516             break;
5517         case VA_FOURCC_YUY2:
5518         case VA_FOURCC_UYVY:
5519             status = i965_image_pl1_processing(ctx,
5520                                                src_surface,
5521                                                src_rect,
5522                                                dst_surface,
5523                                                dst_rect);
5524             break;
5525         case VA_FOURCC_BGRA:
5526         case VA_FOURCC_BGRX:
5527         case VA_FOURCC_RGBA:
5528         case VA_FOURCC_RGBX:
5529             status = i965_image_pl1_rgbx_processing(ctx,
5530                                                src_surface,
5531                                                src_rect,
5532                                                dst_surface,
5533                                                dst_rect);
5534             break;
5535         case VA_FOURCC_P010:
5536             status = i965_image_p010_processing(ctx,
5537                                                src_surface,
5538                                                src_rect,
5539                                                dst_surface,
5540                                                dst_rect);
5541             break;
5542         default:
5543             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5544             break;
5545         }
5546         
5547         _i965UnlockMutex(&i965->pp_mutex);
5548     }
5549
5550     return status;
5551 }       
5552
5553 static void
5554 i965_post_processing_context_finalize(VADriverContextP ctx,
5555     struct i965_post_processing_context *pp_context)
5556 {
5557     int i;
5558
5559     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5560     pp_context->surface_state_binding_table.bo = NULL;
5561
5562     dri_bo_unreference(pp_context->curbe.bo);
5563     pp_context->curbe.bo = NULL;
5564
5565     dri_bo_unreference(pp_context->sampler_state_table.bo);
5566     pp_context->sampler_state_table.bo = NULL;
5567
5568     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5569     pp_context->sampler_state_table.bo_8x8 = NULL;
5570
5571     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5572     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5573
5574     dri_bo_unreference(pp_context->idrt.bo);
5575     pp_context->idrt.bo = NULL;
5576     pp_context->idrt.num_interface_descriptors = 0;
5577
5578     dri_bo_unreference(pp_context->vfe_state.bo);
5579     pp_context->vfe_state.bo = NULL;
5580
5581     for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
5582         pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
5583             ctx);
5584
5585     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5586     pp_context->pp_dn_context.stmm_bo = NULL;
5587
5588     for (i = 0; i < NUM_PP_MODULES; i++) {
5589         struct pp_module *pp_module = &pp_context->pp_modules[i];
5590
5591         dri_bo_unreference(pp_module->kernel.bo);
5592         pp_module->kernel.bo = NULL;
5593     }
5594
5595     free(pp_context->pp_static_parameter);
5596     free(pp_context->pp_inline_parameter);
5597     pp_context->pp_static_parameter = NULL;
5598     pp_context->pp_inline_parameter = NULL;
5599 }
5600
5601 void
5602 i965_post_processing_terminate(VADriverContextP ctx)
5603 {
5604     struct i965_driver_data *i965 = i965_driver_data(ctx);
5605     struct i965_post_processing_context *pp_context = i965->pp_context;
5606
5607     if (pp_context) {
5608         pp_context->finalize(ctx, pp_context);
5609         free(pp_context);
5610     }
5611
5612     i965->pp_context = NULL;
5613 }
5614
5615 #define VPP_CURBE_ALLOCATION_SIZE       32
5616
5617 void
5618 i965_post_processing_context_init(VADriverContextP ctx,
5619                                   void *data,
5620                                   struct intel_batchbuffer *batch)
5621 {
5622     struct i965_driver_data *i965 = i965_driver_data(ctx);
5623     int i;
5624     struct i965_post_processing_context *pp_context = data;
5625     const AVSConfig *avs_config;
5626
5627     if (IS_IRONLAKE(i965->intel.device_info)) {
5628         pp_context->urb.size = i965->intel.device_info->urb_size;
5629         pp_context->urb.num_vfe_entries = 32;
5630         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5631         pp_context->urb.num_cs_entries = 1;
5632         pp_context->urb.size_cs_entry = 2;
5633         pp_context->urb.vfe_start = 0;
5634         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5635             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5636         assert(pp_context->urb.cs_start +
5637            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5638         pp_context->intel_post_processing = ironlake_post_processing;
5639     } else {
5640         pp_context->vfe_gpu_state.max_num_threads = 60;
5641         pp_context->vfe_gpu_state.num_urb_entries = 59;
5642         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5643         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5644         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5645         pp_context->intel_post_processing = gen6_post_processing;
5646     }
5647
5648     pp_context->finalize = i965_post_processing_context_finalize;
5649
5650     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5651     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5652     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5653     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5654
5655     if (IS_HASWELL(i965->intel.device_info))
5656         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5657     else if (IS_GEN7(i965->intel.device_info))
5658         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5659     else if (IS_GEN6(i965->intel.device_info))
5660         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5661     else if (IS_IRONLAKE(i965->intel.device_info))
5662         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5663
5664     for (i = 0; i < NUM_PP_MODULES; i++) {
5665         struct pp_module *pp_module = &pp_context->pp_modules[i];
5666         dri_bo_unreference(pp_module->kernel.bo);
5667         if (pp_module->kernel.bin && pp_module->kernel.size) {
5668             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5669                                                 pp_module->kernel.name,
5670                                                 pp_module->kernel.size,
5671                                                 4096);
5672             assert(pp_module->kernel.bo);
5673             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5674         } else {
5675             pp_module->kernel.bo = NULL;
5676         }
5677     }
5678
5679     /* static & inline parameters */
5680     if (IS_GEN7(i965->intel.device_info)) {
5681         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5682         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5683     } else {
5684         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5685         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5686     }
5687
5688     pp_context->batch = batch;
5689     pp_dndi_context_init(&pp_context->pp_dndi_context);
5690
5691     avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
5692         &gen6_avs_config;
5693     avs_init_state(&pp_context->pp_avs_context.state, avs_config);
5694 }
5695
5696 bool
5697 i965_post_processing_init(VADriverContextP ctx)
5698 {
5699     struct i965_driver_data *i965 = i965_driver_data(ctx);
5700     struct i965_post_processing_context *pp_context = i965->pp_context;
5701
5702     if (HAS_VPP(i965)) {
5703         if (pp_context == NULL) {
5704             pp_context = calloc(1, sizeof(*pp_context));
5705             assert(pp_context);
5706             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5707             i965->pp_context = pp_context;
5708         }
5709     }
5710
5711     return true;
5712 }
5713
5714 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5715     PP_NULL,    /* VAProcFilterNone */
5716     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5717     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5718     PP_NULL,    /* VAProcFilterSharpening */
5719     PP_NULL,    /* VAProcFilterColorBalance */
5720 };
5721
5722 static const int proc_frame_to_pp_frame[3] = {
5723     I965_SURFACE_FLAG_FRAME,
5724     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5725     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5726 };
5727
5728 enum {
5729     PP_OP_CHANGE_FORMAT = 1 << 0,
5730     PP_OP_CHANGE_SIZE   = 1 << 1,
5731     PP_OP_DEINTERLACE   = 1 << 2,
5732     PP_OP_COMPLEX       = 1 << 3,
5733 };
5734
5735 static int
5736 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
5737     uint32_t filter_flags)
5738 {
5739     int pp_index = -1;
5740
5741     if (!dst_fourcc)
5742         dst_fourcc = src_fourcc;
5743
5744     switch (src_fourcc) {
5745     case VA_FOURCC_RGBX:
5746     case VA_FOURCC_RGBA:
5747     case VA_FOURCC_BGRX:
5748     case VA_FOURCC_BGRA:
5749         switch (dst_fourcc) {
5750         case VA_FOURCC_NV12:
5751             pp_index = PP_RGBX_LOAD_SAVE_NV12;
5752             break;
5753         }
5754         break;
5755     case VA_FOURCC_YUY2:
5756     case VA_FOURCC_UYVY:
5757         switch (dst_fourcc) {
5758         case VA_FOURCC_NV12:
5759             pp_index = PP_PA_LOAD_SAVE_NV12;
5760             break;
5761         case VA_FOURCC_I420:
5762         case VA_FOURCC_YV12:
5763             pp_index = PP_PA_LOAD_SAVE_PL3;
5764             break;
5765         case VA_FOURCC_YUY2:
5766         case VA_FOURCC_UYVY:
5767             pp_index = PP_PA_LOAD_SAVE_PA;
5768             break;
5769         }
5770         break;
5771     case VA_FOURCC_NV12:
5772         switch (dst_fourcc) {
5773         case VA_FOURCC_NV12:
5774             if (pp_ops & PP_OP_CHANGE_SIZE)
5775                 pp_index = avs_is_needed(filter_flags) ?
5776                     PP_NV12_AVS : PP_NV12_SCALING;
5777             else
5778                 pp_index = PP_NV12_LOAD_SAVE_N12;
5779             break;
5780         case VA_FOURCC_I420:
5781         case VA_FOURCC_YV12:
5782         case VA_FOURCC_IMC1:
5783         case VA_FOURCC_IMC3:
5784             pp_index = PP_NV12_LOAD_SAVE_PL3;
5785             break;
5786         case VA_FOURCC_YUY2:
5787         case VA_FOURCC_UYVY:
5788             pp_index = PP_NV12_LOAD_SAVE_PA;
5789             break;
5790         case VA_FOURCC_RGBX:
5791         case VA_FOURCC_RGBA:
5792         case VA_FOURCC_BGRX:
5793         case VA_FOURCC_BGRA:
5794             pp_index = PP_NV12_LOAD_SAVE_RGBX;
5795             break;
5796         }
5797         break;
5798     case VA_FOURCC_I420:
5799     case VA_FOURCC_YV12:
5800     case VA_FOURCC_IMC1:
5801     case VA_FOURCC_IMC3:
5802     case VA_FOURCC_YV16:
5803     case VA_FOURCC_411P:
5804     case VA_FOURCC_422H:
5805     case VA_FOURCC_422V:
5806     case VA_FOURCC_444P:
5807         switch (dst_fourcc) {
5808         case VA_FOURCC_NV12:
5809             pp_index = PP_PL3_LOAD_SAVE_N12;
5810             break;
5811         case VA_FOURCC_I420:
5812         case VA_FOURCC_YV12:
5813         case VA_FOURCC_IMC1:
5814         case VA_FOURCC_IMC3:
5815             pp_index = PP_PL3_LOAD_SAVE_PL3;
5816             break;
5817         case VA_FOURCC_YUY2:
5818         case VA_FOURCC_UYVY:
5819             pp_index = PP_PL3_LOAD_SAVE_PA;
5820             break;
5821         }
5822         break;
5823     }
5824     return pp_index;
5825 }
5826
5827 static VAStatus
5828 i965_proc_picture_fast(VADriverContextP ctx,
5829     struct i965_proc_context *proc_context, struct proc_state *proc_state)
5830 {
5831     struct i965_driver_data * const i965 = i965_driver_data(ctx);
5832     const VAProcPipelineParameterBuffer * const pipeline_param =
5833         (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5834     struct object_surface *src_obj_surface, *dst_obj_surface;
5835     struct i965_surface src_surface, dst_surface;
5836     const VAProcFilterParameterBufferDeinterlacing *deint_params = NULL;
5837     VARectangle src_rect, dst_rect;
5838     VAStatus status;
5839     uint32_t i, filter_flags = 0, pp_ops = 0;
5840     int pp_index;
5841
5842     /* Validate pipeline parameters */
5843     if (pipeline_param->num_filters > 0 && !pipeline_param->filters)
5844         return VA_STATUS_ERROR_INVALID_PARAMETER;
5845
5846     for (i = 0; i < pipeline_param->num_filters; i++) {
5847         const VAProcFilterParameterBuffer *filter;
5848         struct object_buffer * const obj_buffer =
5849             BUFFER(pipeline_param->filters[i]);
5850
5851         assert(obj_buffer && obj_buffer->buffer_store);
5852         if (!obj_buffer || !obj_buffer->buffer_store)
5853             return VA_STATUS_ERROR_INVALID_PARAMETER;
5854
5855         filter = (VAProcFilterParameterBuffer *)
5856             obj_buffer->buffer_store->buffer;
5857         switch (filter->type) {
5858         case VAProcFilterDeinterlacing:
5859             pp_ops |= PP_OP_DEINTERLACE;
5860             deint_params = (VAProcFilterParameterBufferDeinterlacing *)filter;
5861             break;
5862         default:
5863             pp_ops |= PP_OP_COMPLEX;
5864             break;
5865         }
5866     }
5867     filter_flags |= pipeline_param->filter_flags & VA_FILTER_SCALING_MASK;
5868
5869     /* Validate source surface */
5870     src_obj_surface = SURFACE(pipeline_param->surface);
5871     if (!src_obj_surface)
5872         return VA_STATUS_ERROR_INVALID_SURFACE;
5873
5874     if (!src_obj_surface->fourcc)
5875         return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
5876
5877     if (pipeline_param->surface_region) {
5878         src_rect.x = pipeline_param->surface_region->x;
5879         src_rect.y = pipeline_param->surface_region->y;
5880         src_rect.width = pipeline_param->surface_region->width;
5881         src_rect.height = pipeline_param->surface_region->height;
5882     } else {
5883         src_rect.x = 0;
5884         src_rect.y = 0;
5885         src_rect.width = src_obj_surface->orig_width;
5886         src_rect.height = src_obj_surface->orig_height;
5887     }
5888
5889     src_surface.base  = &src_obj_surface->base;
5890     src_surface.type  = I965_SURFACE_TYPE_SURFACE;
5891     src_surface.flags = I965_SURFACE_FLAG_FRAME;
5892
5893     if (pp_ops & PP_OP_DEINTERLACE) {
5894         filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
5895             VA_TOP_FIELD : VA_BOTTOM_FIELD;
5896         if (deint_params->algorithm != VAProcDeinterlacingBob)
5897             pp_ops |= PP_OP_COMPLEX;
5898     }
5899     else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
5900         filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
5901             VA_TOP_FIELD : VA_BOTTOM_FIELD;
5902         pp_ops |= PP_OP_DEINTERLACE;
5903     }
5904     if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
5905         pp_ops |= PP_OP_COMPLEX;
5906
5907     /* Validate target surface */
5908     dst_obj_surface = SURFACE(proc_state->current_render_target);
5909     if (!dst_obj_surface)
5910         return VA_STATUS_ERROR_INVALID_SURFACE;
5911
5912     if (!dst_obj_surface->bo)
5913         return VA_STATUS_ERROR_INVALID_SURFACE;
5914
5915     if (dst_obj_surface->fourcc &&
5916         dst_obj_surface->fourcc != src_obj_surface->fourcc)
5917         pp_ops |= PP_OP_CHANGE_FORMAT;
5918
5919     if (pipeline_param->output_region) {
5920         dst_rect.x = pipeline_param->output_region->x;
5921         dst_rect.y = pipeline_param->output_region->y;
5922         dst_rect.width = pipeline_param->output_region->width;
5923         dst_rect.height = pipeline_param->output_region->height;
5924     } else {
5925         dst_rect.x = 0;
5926         dst_rect.y = 0;
5927         dst_rect.width = dst_obj_surface->orig_width;
5928         dst_rect.height = dst_obj_surface->orig_height;
5929     }
5930
5931     if (dst_rect.width != src_rect.width || dst_rect.height != src_rect.height)
5932         pp_ops |= PP_OP_CHANGE_SIZE;
5933
5934     dst_surface.base  = &dst_obj_surface->base;
5935     dst_surface.type  = I965_SURFACE_TYPE_SURFACE;
5936     dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5937
5938     /* Validate "fast-path" processing capabilities */
5939     if (!IS_GEN7(i965->intel.device_info)) {
5940         if ((pp_ops & PP_OP_CHANGE_FORMAT) && (pp_ops & PP_OP_CHANGE_SIZE))
5941             return VA_STATUS_ERROR_UNIMPLEMENTED; // temporary surface is needed
5942     }
5943     if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
5944         filter_flags &= ~VA_FILTER_SCALING_MASK;
5945         filter_flags |= VA_FILTER_SCALING_FAST;
5946     }
5947     else {
5948         if (pp_ops & PP_OP_COMPLEX)
5949             return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
5950         if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
5951             return VA_STATUS_ERROR_UNIMPLEMENTED;
5952     }
5953
5954     pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
5955         dst_obj_surface->fourcc, pp_ops, filter_flags);
5956     if (pp_index < 0)
5957         return VA_STATUS_ERROR_UNIMPLEMENTED;
5958
5959     proc_context->pp_context.filter_flags = filter_flags;
5960     status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5961         &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
5962     intel_batchbuffer_flush(proc_context->pp_context.batch);
5963     return status;
5964 }
5965
5966 VAStatus 
5967 i965_proc_picture(VADriverContextP ctx, 
5968                   VAProfile profile, 
5969                   union codec_state *codec_state,
5970                   struct hw_context *hw_context)
5971 {
5972     struct i965_driver_data *i965 = i965_driver_data(ctx);
5973     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5974     struct proc_state *proc_state = &codec_state->proc;
5975     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5976     struct object_surface *obj_surface;
5977     struct i965_surface src_surface, dst_surface;
5978     VARectangle src_rect, dst_rect;
5979     VAStatus status;
5980     int i;
5981     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5982     int num_tmp_surfaces = 0;
5983     unsigned int tiling = 0, swizzle = 0;
5984     int in_width, in_height;
5985
5986     status = i965_proc_picture_fast(ctx, proc_context, proc_state);
5987     if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
5988         return status;
5989
5990     if (pipeline_param->surface == VA_INVALID_ID ||
5991         proc_state->current_render_target == VA_INVALID_ID) {
5992         status = VA_STATUS_ERROR_INVALID_SURFACE;
5993         goto error;
5994     }
5995
5996     obj_surface = SURFACE(pipeline_param->surface);
5997
5998     if (!obj_surface) {
5999         status = VA_STATUS_ERROR_INVALID_SURFACE;
6000         goto error;
6001     }
6002
6003     if (!obj_surface->bo) {
6004         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6005         goto error;
6006     }
6007
6008     if (pipeline_param->num_filters && !pipeline_param->filters) {
6009         status = VA_STATUS_ERROR_INVALID_PARAMETER;
6010         goto error;
6011     }
6012
6013     in_width = obj_surface->orig_width;
6014     in_height = obj_surface->orig_height;
6015     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6016
6017     src_surface.base = (struct object_base *)obj_surface;
6018     src_surface.type = I965_SURFACE_TYPE_SURFACE;
6019     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6020
6021     VASurfaceID out_surface_id = VA_INVALID_ID;
6022     if (obj_surface->fourcc != VA_FOURCC_NV12) {
6023         src_surface.base = (struct object_base *)obj_surface;
6024         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6025         src_surface.flags = I965_SURFACE_FLAG_FRAME;
6026         src_rect.x = 0;
6027         src_rect.y = 0;
6028         src_rect.width = in_width;
6029         src_rect.height = in_height;
6030
6031         status = i965_CreateSurfaces(ctx,
6032                                      in_width,
6033                                      in_height,
6034                                      VA_RT_FORMAT_YUV420,
6035                                      1,
6036                                      &out_surface_id);
6037         if (status != VA_STATUS_SUCCESS)
6038             goto error;
6039         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6040         obj_surface = SURFACE(out_surface_id);
6041         assert(obj_surface);
6042         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6043
6044         dst_surface.base = (struct object_base *)obj_surface;
6045         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6046         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6047         dst_rect.x = 0;
6048         dst_rect.y = 0;
6049         dst_rect.width = in_width;
6050         dst_rect.height = in_height;
6051
6052         status = i965_image_processing(ctx,
6053                                        &src_surface,
6054                                        &src_rect,
6055                                        &dst_surface,
6056                                        &dst_rect);
6057         if (status != VA_STATUS_SUCCESS)
6058             goto error;
6059
6060         src_surface.base = (struct object_base *)obj_surface;
6061         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6062         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6063     }
6064
6065     if (pipeline_param->surface_region) {
6066         src_rect.x = pipeline_param->surface_region->x;
6067         src_rect.y = pipeline_param->surface_region->y;
6068         src_rect.width = pipeline_param->surface_region->width;
6069         src_rect.height = pipeline_param->surface_region->height;
6070     } else {
6071         src_rect.x = 0;
6072         src_rect.y = 0;
6073         src_rect.width = in_width;
6074         src_rect.height = in_height;
6075     }
6076
6077     proc_context->pp_context.pipeline_param = pipeline_param;
6078
6079     for (i = 0; i < pipeline_param->num_filters; i++) {
6080         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6081         VAProcFilterParameterBufferBase *filter_param = NULL;
6082         VAProcFilterType filter_type;
6083         int kernel_index;
6084
6085         if (!obj_buffer ||
6086             !obj_buffer->buffer_store ||
6087             !obj_buffer->buffer_store->buffer) {
6088             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6089             goto error;
6090         }
6091
6092         out_surface_id = VA_INVALID_ID;
6093         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6094         filter_type = filter_param->type;
6095         kernel_index = procfilter_to_pp_flag[filter_type];
6096
6097         if (kernel_index != PP_NULL &&
6098             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6099             status = i965_CreateSurfaces(ctx,
6100                                          in_width,
6101                                          in_height,
6102                                          VA_RT_FORMAT_YUV420,
6103                                          1,
6104                                          &out_surface_id);
6105             assert(status == VA_STATUS_SUCCESS);
6106             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6107             obj_surface = SURFACE(out_surface_id);
6108             assert(obj_surface);
6109             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6110             dst_surface.base = (struct object_base *)obj_surface;
6111             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6112             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6113                                                    &src_surface,
6114                                                    &src_rect,
6115                                                    &dst_surface,
6116                                                    &src_rect,
6117                                                    kernel_index,
6118                                                    filter_param);
6119
6120             if (status == VA_STATUS_SUCCESS) {
6121                 src_surface.base = dst_surface.base;
6122                 src_surface.type = dst_surface.type;
6123                 src_surface.flags = dst_surface.flags;
6124             }
6125         }
6126     }
6127
6128     proc_context->pp_context.pipeline_param = NULL;
6129     obj_surface = SURFACE(proc_state->current_render_target);
6130     
6131     if (!obj_surface) {
6132         status = VA_STATUS_ERROR_INVALID_SURFACE;
6133         goto error;
6134     }
6135
6136     if (pipeline_param->output_region) {
6137         dst_rect.x = pipeline_param->output_region->x;
6138         dst_rect.y = pipeline_param->output_region->y;
6139         dst_rect.width = pipeline_param->output_region->width;
6140         dst_rect.height = pipeline_param->output_region->height;
6141     } else {
6142         dst_rect.x = 0;
6143         dst_rect.y = 0;
6144         dst_rect.width = obj_surface->orig_width;
6145         dst_rect.height = obj_surface->orig_height;
6146     }
6147
6148     if (IS_GEN7(i965->intel.device_info) ||
6149         IS_GEN8(i965->intel.device_info) ||
6150         IS_GEN9(i965->intel.device_info)) {
6151         unsigned int saved_filter_flag;
6152         struct i965_post_processing_context *i965pp_context = i965->pp_context;
6153
6154         if (obj_surface->fourcc == 0) {
6155             i965_check_alloc_surface_bo(ctx, obj_surface, 1,
6156                                         VA_FOURCC_NV12,
6157                                         SUBSAMPLE_YUV420);
6158         }
6159
6160         i965_vpp_clear_surface(ctx, &proc_context->pp_context,
6161                                obj_surface,
6162                                pipeline_param->output_background_color);
6163
6164         intel_batchbuffer_flush(hw_context->batch);
6165
6166         saved_filter_flag = i965pp_context->filter_flags;
6167         i965pp_context->filter_flags = (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK);
6168
6169         dst_surface.base = (struct object_base *)obj_surface;
6170         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6171         i965_image_processing(ctx, &src_surface, &src_rect, &dst_surface, &dst_rect);
6172
6173         i965pp_context->filter_flags = saved_filter_flag;
6174
6175         if (num_tmp_surfaces)
6176             i965_DestroySurfaces(ctx,
6177                              tmp_surfaces,
6178                              num_tmp_surfaces);
6179
6180         return VA_STATUS_SUCCESS;
6181     }
6182
6183     int csc_needed = 0;
6184     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12){
6185         csc_needed = 1;
6186         out_surface_id = VA_INVALID_ID;
6187         status = i965_CreateSurfaces(ctx,
6188                                      obj_surface->orig_width,
6189                                      obj_surface->orig_height,
6190                                      VA_RT_FORMAT_YUV420, 
6191                                      1,
6192                                      &out_surface_id);
6193         assert(status == VA_STATUS_SUCCESS);
6194         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6195         struct object_surface *csc_surface = SURFACE(out_surface_id);
6196         assert(csc_surface);
6197         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6198         dst_surface.base = (struct object_base *)csc_surface;
6199     } else {
6200         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6201         dst_surface.base = (struct object_base *)obj_surface;
6202     }
6203
6204     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6205     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
6206
6207     // load/save doesn't support different origin offset for src and dst surface
6208     if (src_rect.width == dst_rect.width &&
6209         src_rect.height == dst_rect.height &&
6210         src_rect.x == dst_rect.x &&
6211         src_rect.y == dst_rect.y) {
6212         i965_post_processing_internal(ctx, &proc_context->pp_context,
6213                                       &src_surface,
6214                                       &src_rect,
6215                                       &dst_surface,
6216                                       &dst_rect,
6217                                       PP_NV12_LOAD_SAVE_N12,
6218                                       NULL);
6219     } else {
6220
6221         proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
6222         i965_post_processing_internal(ctx, &proc_context->pp_context,
6223                                       &src_surface,
6224                                       &src_rect,
6225                                       &dst_surface,
6226                                       &dst_rect,
6227                                       avs_is_needed(pipeline_param->filter_flags) ? PP_NV12_AVS : PP_NV12_SCALING,
6228                                       NULL);
6229     }
6230
6231     if (csc_needed) {
6232         src_surface.base = dst_surface.base;
6233         src_surface.type = dst_surface.type;
6234         src_surface.flags = dst_surface.flags;
6235         dst_surface.base = (struct object_base *)obj_surface;
6236         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6237         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6238     }
6239     
6240     if (num_tmp_surfaces)
6241         i965_DestroySurfaces(ctx,
6242                              tmp_surfaces,
6243                              num_tmp_surfaces);
6244
6245     intel_batchbuffer_flush(hw_context->batch);
6246
6247     return VA_STATUS_SUCCESS;
6248
6249 error:
6250     if (num_tmp_surfaces)
6251         i965_DestroySurfaces(ctx,
6252                              tmp_surfaces,
6253                              num_tmp_surfaces);
6254
6255     return status;
6256 }
6257
6258 static void
6259 i965_proc_context_destroy(void *hw_context)
6260 {
6261     struct i965_proc_context * const proc_context = hw_context;
6262     VADriverContextP const ctx = proc_context->driver_context;
6263
6264     proc_context->pp_context.finalize(ctx, &proc_context->pp_context);
6265     intel_batchbuffer_free(proc_context->base.batch);
6266     free(proc_context);
6267 }
6268
6269 struct hw_context *
6270 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6271 {
6272     struct i965_driver_data *i965 = i965_driver_data(ctx);
6273     struct intel_driver_data *intel = intel_driver_data(ctx);
6274     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6275
6276     if (!proc_context)
6277         return NULL;
6278
6279     proc_context->base.destroy = i965_proc_context_destroy;
6280     proc_context->base.run = i965_proc_picture;
6281     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6282     proc_context->driver_context = ctx;
6283     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6284
6285     return (struct hw_context *)proc_context;
6286 }
6287
6288