OSDN Git Service

VPP: Initialize the uninitialed surface to avoid NULL GPU buffer
[android-x86/hardware-intel-common-vaapi.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 extern VAStatus
44 vpp_surface_convert(VADriverContextP ctx,
45                     struct object_surface *src_obj_surf,
46                     struct object_surface *dst_obj_surf);
47
48 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
49
50 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
51                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
52
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
55
56 #define GPU_ASM_BLOCK_WIDTH         16
57 #define GPU_ASM_BLOCK_HEIGHT        8
58 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
59
60 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
116 };
117
118 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
120 };
121
122 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
123 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
124 };
125
126 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
127                                    const struct i965_surface *src_surface,
128                                    const VARectangle *src_rect,
129                                    struct i965_surface *dst_surface,
130                                    const VARectangle *dst_rect,
131                                    void *filter_param);
132 static VAStatus
133 pp_nv12_avs_initialize(VADriverContextP ctx,
134     struct i965_post_processing_context *pp_context,
135     const struct i965_surface *src_surface, const VARectangle *src_rect,
136     struct i965_surface *dst_surface, const VARectangle *dst_rect,
137     void *filter_param);
138 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
139                                            const struct i965_surface *src_surface,
140                                            const VARectangle *src_rect,
141                                            struct i965_surface *dst_surface,
142                                            const VARectangle *dst_rect,
143                                            void *filter_param);
144 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
145                                              const struct i965_surface *src_surface,
146                                              const VARectangle *src_rect,
147                                              struct i965_surface *dst_surface,
148                                              const VARectangle *dst_rect,
149                                              void *filter_param);
150 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
151                                                 const struct i965_surface *src_surface,
152                                                 const VARectangle *src_rect,
153                                                 struct i965_surface *dst_surface,
154                                                 const VARectangle *dst_rect,
155                                                 void *filter_param);
156 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
157                                         const struct i965_surface *src_surface,
158                                         const VARectangle *src_rect,
159                                         struct i965_surface *dst_surface,
160                                         const VARectangle *dst_rect,
161                                         void *filter_param);
162 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
163                                       const struct i965_surface *src_surface,
164                                       const VARectangle *src_rect,
165                                       struct i965_surface *dst_surface,
166                                       const VARectangle *dst_rect,
167                                       void *filter_param);
168
169 static struct pp_module pp_modules_gen5[] = {
170     {
171         {
172             "NULL module (for testing)",
173             PP_NULL,
174             pp_null_gen5,
175             sizeof(pp_null_gen5),
176             NULL,
177         },
178
179         pp_null_initialize,
180     },
181
182     {
183         {
184             "NV12_NV12",
185             PP_NV12_LOAD_SAVE_N12,
186             pp_nv12_load_save_nv12_gen5,
187             sizeof(pp_nv12_load_save_nv12_gen5),
188             NULL,
189         },
190
191         pp_plx_load_save_plx_initialize,
192     },
193
194     {
195         {
196             "NV12_PL3",
197             PP_NV12_LOAD_SAVE_PL3,
198             pp_nv12_load_save_pl3_gen5,
199             sizeof(pp_nv12_load_save_pl3_gen5),
200             NULL,
201         },
202
203         pp_plx_load_save_plx_initialize,
204     },
205
206     {
207         {
208             "PL3_NV12",
209             PP_PL3_LOAD_SAVE_N12,
210             pp_pl3_load_save_nv12_gen5,
211             sizeof(pp_pl3_load_save_nv12_gen5),
212             NULL,
213         },
214
215         pp_plx_load_save_plx_initialize,
216     },
217
218     {
219         {
220             "PL3_PL3",
221             PP_PL3_LOAD_SAVE_PL3,
222             pp_pl3_load_save_pl3_gen5,
223             sizeof(pp_pl3_load_save_pl3_gen5),
224             NULL,
225         },
226
227         pp_plx_load_save_plx_initialize
228     },
229
230     {
231         {
232             "NV12 Scaling module",
233             PP_NV12_SCALING,
234             pp_nv12_scaling_gen5,
235             sizeof(pp_nv12_scaling_gen5),
236             NULL,
237         },
238
239         pp_nv12_scaling_initialize,
240     },
241
242     {
243         {
244             "NV12 AVS module",
245             PP_NV12_AVS,
246             pp_nv12_avs_gen5,
247             sizeof(pp_nv12_avs_gen5),
248             NULL,
249         },
250
251         pp_nv12_avs_initialize,
252     },
253
254     {
255         {
256             "NV12 DNDI module",
257             PP_NV12_DNDI,
258             pp_nv12_dndi_gen5,
259             sizeof(pp_nv12_dndi_gen5),
260             NULL,
261         },
262
263         pp_nv12_dndi_initialize,
264     },
265
266     {
267         {
268             "NV12 DN module",
269             PP_NV12_DN,
270             pp_nv12_dn_gen5,
271             sizeof(pp_nv12_dn_gen5),
272             NULL,
273         },
274
275         pp_nv12_dn_initialize,
276     },
277
278     {
279         {
280             "NV12_PA module",
281             PP_NV12_LOAD_SAVE_PA,
282             pp_nv12_load_save_pa_gen5,
283             sizeof(pp_nv12_load_save_pa_gen5),
284             NULL,
285         },
286     
287         pp_plx_load_save_plx_initialize,
288     },
289
290     {
291         {
292             "PL3_PA module",
293             PP_PL3_LOAD_SAVE_PA,
294             pp_pl3_load_save_pa_gen5,
295             sizeof(pp_pl3_load_save_pa_gen5),
296             NULL,
297         },
298     
299         pp_plx_load_save_plx_initialize,
300     },
301
302     {
303         {
304             "PA_NV12 module",
305             PP_PA_LOAD_SAVE_NV12,
306             pp_pa_load_save_nv12_gen5,
307             sizeof(pp_pa_load_save_nv12_gen5),
308             NULL,
309         },
310     
311         pp_plx_load_save_plx_initialize,
312     },
313
314     {
315         {
316             "PA_PL3 module",
317             PP_PA_LOAD_SAVE_PL3,
318             pp_pa_load_save_pl3_gen5,
319             sizeof(pp_pa_load_save_pl3_gen5),
320             NULL,
321         },
322     
323         pp_plx_load_save_plx_initialize,
324     },
325
326     {
327         {
328             "PA_PA module",
329             PP_PA_LOAD_SAVE_PA,
330             pp_pa_load_save_pa_gen5,
331             sizeof(pp_pa_load_save_pa_gen5),
332             NULL,
333         },
334
335         pp_plx_load_save_plx_initialize,
336     },
337
338     {
339         {
340             "RGBX_NV12 module",
341             PP_RGBX_LOAD_SAVE_NV12,
342             pp_rgbx_load_save_nv12_gen5,
343             sizeof(pp_rgbx_load_save_nv12_gen5),
344             NULL,
345         },
346     
347         pp_plx_load_save_plx_initialize,
348     },
349             
350     {
351         {
352             "NV12_RGBX module",
353             PP_NV12_LOAD_SAVE_RGBX,
354             pp_nv12_load_save_rgbx_gen5,
355             sizeof(pp_nv12_load_save_rgbx_gen5),
356             NULL,
357         },
358     
359         pp_plx_load_save_plx_initialize,
360     },
361 };
362
363 static const uint32_t pp_null_gen6[][4] = {
364 #include "shaders/post_processing/gen5_6/null.g6b"
365 };
366
367 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
368 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
369 };
370
371 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
372 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
373 };
374
375 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
376 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
377 };
378
379 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
380 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
381 };
382
383 static const uint32_t pp_nv12_scaling_gen6[][4] = {
384 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
385 };
386
387 static const uint32_t pp_nv12_avs_gen6[][4] = {
388 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
389 };
390
391 static const uint32_t pp_nv12_dndi_gen6[][4] = {
392 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
393 };
394
395 static const uint32_t pp_nv12_dn_gen6[][4] = {
396 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
397 };
398
399 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
400 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
401 };
402
403 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
404 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
405 };
406
407 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
408 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
409 };
410
411 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
412 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
413 };
414
415 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
416 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
417 };
418
419 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
420 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
421 };
422
423 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
424 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
425 };
426
427 static struct pp_module pp_modules_gen6[] = {
428     {
429         {
430             "NULL module (for testing)",
431             PP_NULL,
432             pp_null_gen6,
433             sizeof(pp_null_gen6),
434             NULL,
435         },
436
437         pp_null_initialize,
438     },
439
440     {
441         {
442             "NV12_NV12",
443             PP_NV12_LOAD_SAVE_N12,
444             pp_nv12_load_save_nv12_gen6,
445             sizeof(pp_nv12_load_save_nv12_gen6),
446             NULL,
447         },
448
449         pp_plx_load_save_plx_initialize,
450     },
451
452     {
453         {
454             "NV12_PL3",
455             PP_NV12_LOAD_SAVE_PL3,
456             pp_nv12_load_save_pl3_gen6,
457             sizeof(pp_nv12_load_save_pl3_gen6),
458             NULL,
459         },
460         
461         pp_plx_load_save_plx_initialize,
462     },
463
464     {
465         {
466             "PL3_NV12",
467             PP_PL3_LOAD_SAVE_N12,
468             pp_pl3_load_save_nv12_gen6,
469             sizeof(pp_pl3_load_save_nv12_gen6),
470             NULL,
471         },
472
473         pp_plx_load_save_plx_initialize,
474     },
475
476     {
477         {
478             "PL3_PL3",
479             PP_PL3_LOAD_SAVE_PL3,
480             pp_pl3_load_save_pl3_gen6,
481             sizeof(pp_pl3_load_save_pl3_gen6),
482             NULL,
483         },
484
485         pp_plx_load_save_plx_initialize,
486     },
487
488     {
489         {
490             "NV12 Scaling module",
491             PP_NV12_SCALING,
492             pp_nv12_scaling_gen6,
493             sizeof(pp_nv12_scaling_gen6),
494             NULL,
495         },
496
497         gen6_nv12_scaling_initialize,
498     },
499
500     {
501         {
502             "NV12 AVS module",
503             PP_NV12_AVS,
504             pp_nv12_avs_gen6,
505             sizeof(pp_nv12_avs_gen6),
506             NULL,
507         },
508
509         pp_nv12_avs_initialize,
510     },
511
512     {
513         {
514             "NV12 DNDI module",
515             PP_NV12_DNDI,
516             pp_nv12_dndi_gen6,
517             sizeof(pp_nv12_dndi_gen6),
518             NULL,
519         },
520
521         pp_nv12_dndi_initialize,
522     },
523
524     {
525         {
526             "NV12 DN module",
527             PP_NV12_DN,
528             pp_nv12_dn_gen6,
529             sizeof(pp_nv12_dn_gen6),
530             NULL,
531         },
532
533         pp_nv12_dn_initialize,
534     },
535     {
536         {
537             "NV12_PA module",
538             PP_NV12_LOAD_SAVE_PA,
539             pp_nv12_load_save_pa_gen6,
540             sizeof(pp_nv12_load_save_pa_gen6),
541             NULL,
542         },
543     
544         pp_plx_load_save_plx_initialize,
545     },
546
547     {
548         {
549             "PL3_PA module",
550             PP_PL3_LOAD_SAVE_PA,
551             pp_pl3_load_save_pa_gen6,
552             sizeof(pp_pl3_load_save_pa_gen6),
553             NULL,
554         },
555     
556         pp_plx_load_save_plx_initialize,
557     },
558
559     {
560         {
561             "PA_NV12 module",
562             PP_PA_LOAD_SAVE_NV12,
563             pp_pa_load_save_nv12_gen6,
564             sizeof(pp_pa_load_save_nv12_gen6),
565             NULL,
566         },
567     
568         pp_plx_load_save_plx_initialize,
569     },
570
571     {
572         {
573             "PA_PL3 module",
574             PP_PA_LOAD_SAVE_PL3,
575             pp_pa_load_save_pl3_gen6,
576             sizeof(pp_pa_load_save_pl3_gen6),
577             NULL,
578         },
579     
580         pp_plx_load_save_plx_initialize,
581     },
582
583     {
584         {
585             "PA_PA module",
586             PP_PA_LOAD_SAVE_PA,
587             pp_pa_load_save_pa_gen6,
588             sizeof(pp_pa_load_save_pa_gen6),
589             NULL,
590         },
591
592         pp_plx_load_save_plx_initialize,
593     },
594
595     {
596         {
597             "RGBX_NV12 module",
598             PP_RGBX_LOAD_SAVE_NV12,
599             pp_rgbx_load_save_nv12_gen6,
600             sizeof(pp_rgbx_load_save_nv12_gen6),
601             NULL,
602         },
603     
604         pp_plx_load_save_plx_initialize,
605     },
606
607     {
608         {
609             "NV12_RGBX module",
610             PP_NV12_LOAD_SAVE_RGBX,
611             pp_nv12_load_save_rgbx_gen6,
612             sizeof(pp_nv12_load_save_rgbx_gen6),
613             NULL,
614         },
615     
616         pp_plx_load_save_plx_initialize,
617     },
618 };
619
620 static const uint32_t pp_null_gen7[][4] = {
621 };
622
623 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
625 };
626
627 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
628 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
629 };
630
631 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
632 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
633 };
634
635 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
636 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
637 };
638
639 static const uint32_t pp_nv12_scaling_gen7[][4] = {
640 #include "shaders/post_processing/gen7/avs.g7b"
641 };
642
643 static const uint32_t pp_nv12_avs_gen7[][4] = {
644 #include "shaders/post_processing/gen7/avs.g7b"
645 };
646
647 static const uint32_t pp_nv12_dndi_gen7[][4] = {
648 #include "shaders/post_processing/gen7/dndi.g7b"
649 };
650
651 static const uint32_t pp_nv12_dn_gen7[][4] = {
652 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
653 };
654 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
655 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
656 };
657 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
658 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
659 };
660 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
661 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
662 };
663 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
664 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
665 };
666 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
667 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
668 };
669 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
670 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
671 };
672 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
673 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
674 };
675
676 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
677                                            const struct i965_surface *src_surface,
678                                            const VARectangle *src_rect,
679                                            struct i965_surface *dst_surface,
680                                            const VARectangle *dst_rect,
681                                            void *filter_param);
682 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
683                                              const struct i965_surface *src_surface,
684                                              const VARectangle *src_rect,
685                                              struct i965_surface *dst_surface,
686                                              const VARectangle *dst_rect,
687                                              void *filter_param);
688 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
689                                            const struct i965_surface *src_surface,
690                                            const VARectangle *src_rect,
691                                            struct i965_surface *dst_surface,
692                                            const VARectangle *dst_rect,
693                                            void *filter_param);
694
695 static struct pp_module pp_modules_gen7[] = {
696     {
697         {
698             "NULL module (for testing)",
699             PP_NULL,
700             pp_null_gen7,
701             sizeof(pp_null_gen7),
702             NULL,
703         },
704
705         pp_null_initialize,
706     },
707
708     {
709         {
710             "NV12_NV12",
711             PP_NV12_LOAD_SAVE_N12,
712             pp_nv12_load_save_nv12_gen7,
713             sizeof(pp_nv12_load_save_nv12_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12_PL3",
723             PP_NV12_LOAD_SAVE_PL3,
724             pp_nv12_load_save_pl3_gen7,
725             sizeof(pp_nv12_load_save_pl3_gen7),
726             NULL,
727         },
728         
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "PL3_NV12",
735             PP_PL3_LOAD_SAVE_N12,
736             pp_pl3_load_save_nv12_gen7,
737             sizeof(pp_pl3_load_save_nv12_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "PL3_PL3",
747             PP_PL3_LOAD_SAVE_PL3,
748             pp_pl3_load_save_pl3_gen7,
749             sizeof(pp_pl3_load_save_pl3_gen7),
750             NULL,
751         },
752
753         gen7_pp_plx_avs_initialize,
754     },
755
756     {
757         {
758             "NV12 Scaling module",
759             PP_NV12_SCALING,
760             pp_nv12_scaling_gen7,
761             sizeof(pp_nv12_scaling_gen7),
762             NULL,
763         },
764
765         gen7_pp_plx_avs_initialize,
766     },
767
768     {
769         {
770             "NV12 AVS module",
771             PP_NV12_AVS,
772             pp_nv12_avs_gen7,
773             sizeof(pp_nv12_avs_gen7),
774             NULL,
775         },
776
777         gen7_pp_plx_avs_initialize,
778     },
779
780     {
781         {
782             "NV12 DNDI module",
783             PP_NV12_DNDI,
784             pp_nv12_dndi_gen7,
785             sizeof(pp_nv12_dndi_gen7),
786             NULL,
787         },
788
789         gen7_pp_nv12_dndi_initialize,
790     },
791
792     {
793         {
794             "NV12 DN module",
795             PP_NV12_DN,
796             pp_nv12_dn_gen7,
797             sizeof(pp_nv12_dn_gen7),
798             NULL,
799         },
800
801         gen7_pp_nv12_dn_initialize,
802     },
803     {
804         {
805             "NV12_PA module",
806             PP_NV12_LOAD_SAVE_PA,
807             pp_nv12_load_save_pa_gen7,
808             sizeof(pp_nv12_load_save_pa_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814
815     {
816         {
817             "PL3_PA module",
818             PP_PL3_LOAD_SAVE_PA,
819             pp_pl3_load_save_pa_gen7,
820             sizeof(pp_pl3_load_save_pa_gen7),
821             NULL,
822         },
823     
824         gen7_pp_plx_avs_initialize,
825     },
826
827     {
828         {
829             "PA_NV12 module",
830             PP_PA_LOAD_SAVE_NV12,
831             pp_pa_load_save_nv12_gen7,
832             sizeof(pp_pa_load_save_nv12_gen7),
833             NULL,
834         },
835     
836         gen7_pp_plx_avs_initialize,
837     },
838
839     {
840         {
841             "PA_PL3 module",
842             PP_PA_LOAD_SAVE_PL3,
843             pp_pa_load_save_pl3_gen7,
844             sizeof(pp_pa_load_save_pl3_gen7),
845             NULL,
846         },
847     
848         gen7_pp_plx_avs_initialize,
849     },
850
851     {
852         {
853             "PA_PA module",
854             PP_PA_LOAD_SAVE_PA,
855             pp_pa_load_save_pa_gen7,
856             sizeof(pp_pa_load_save_pa_gen7),
857             NULL,
858         },
859
860         gen7_pp_plx_avs_initialize,
861     },
862
863     {
864         {
865             "RGBX_NV12 module",
866             PP_RGBX_LOAD_SAVE_NV12,
867             pp_rgbx_load_save_nv12_gen7,
868             sizeof(pp_rgbx_load_save_nv12_gen7),
869             NULL,
870         },
871     
872         gen7_pp_plx_avs_initialize,
873     },
874
875     {
876         {
877             "NV12_RGBX module",
878             PP_NV12_LOAD_SAVE_RGBX,
879             pp_nv12_load_save_rgbx_gen7,
880             sizeof(pp_nv12_load_save_rgbx_gen7),
881             NULL,
882         },
883     
884         gen7_pp_plx_avs_initialize,
885     },
886             
887 };
888
889 static const uint32_t pp_null_gen75[][4] = {
890 };
891
892 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
893 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
894 };
895
896 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
897 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
898 };
899
900 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
901 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
902 };
903
904 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
905 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
906 };
907
908 static const uint32_t pp_nv12_scaling_gen75[][4] = {
909 #include "shaders/post_processing/gen7/avs.g75b"
910 };
911
912 static const uint32_t pp_nv12_avs_gen75[][4] = {
913 #include "shaders/post_processing/gen7/avs.g75b"
914 };
915
916 static const uint32_t pp_nv12_dndi_gen75[][4] = {
917 // #include "shaders/post_processing/gen7/dndi.g75b"
918 };
919
920 static const uint32_t pp_nv12_dn_gen75[][4] = {
921 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
922 };
923 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
924 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
925 };
926 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
927 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
928 };
929 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
931 };
932 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
933 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
934 };
935 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
936 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
937 };
938 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
939 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
940 };
941 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
942 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
943 };
944
945 static struct pp_module pp_modules_gen75[] = {
946     {
947         {
948             "NULL module (for testing)",
949             PP_NULL,
950             pp_null_gen75,
951             sizeof(pp_null_gen75),
952             NULL,
953         },
954
955         pp_null_initialize,
956     },
957
958     {
959         {
960             "NV12_NV12",
961             PP_NV12_LOAD_SAVE_N12,
962             pp_nv12_load_save_nv12_gen75,
963             sizeof(pp_nv12_load_save_nv12_gen75),
964             NULL,
965         },
966
967         gen7_pp_plx_avs_initialize,
968     },
969
970     {
971         {
972             "NV12_PL3",
973             PP_NV12_LOAD_SAVE_PL3,
974             pp_nv12_load_save_pl3_gen75,
975             sizeof(pp_nv12_load_save_pl3_gen75),
976             NULL,
977         },
978         
979         gen7_pp_plx_avs_initialize,
980     },
981
982     {
983         {
984             "PL3_NV12",
985             PP_PL3_LOAD_SAVE_N12,
986             pp_pl3_load_save_nv12_gen75,
987             sizeof(pp_pl3_load_save_nv12_gen75),
988             NULL,
989         },
990
991         gen7_pp_plx_avs_initialize,
992     },
993
994     {
995         {
996             "PL3_PL3",
997             PP_PL3_LOAD_SAVE_PL3,
998             pp_pl3_load_save_pl3_gen75,
999             sizeof(pp_pl3_load_save_pl3_gen75),
1000             NULL,
1001         },
1002
1003         gen7_pp_plx_avs_initialize,
1004     },
1005
1006     {
1007         {
1008             "NV12 Scaling module",
1009             PP_NV12_SCALING,
1010             pp_nv12_scaling_gen75,
1011             sizeof(pp_nv12_scaling_gen75),
1012             NULL,
1013         },
1014
1015         gen7_pp_plx_avs_initialize,
1016     },
1017
1018     {
1019         {
1020             "NV12 AVS module",
1021             PP_NV12_AVS,
1022             pp_nv12_avs_gen75,
1023             sizeof(pp_nv12_avs_gen75),
1024             NULL,
1025         },
1026
1027         gen7_pp_plx_avs_initialize,
1028     },
1029
1030     {
1031         {
1032             "NV12 DNDI module",
1033             PP_NV12_DNDI,
1034             pp_nv12_dndi_gen75,
1035             sizeof(pp_nv12_dndi_gen75),
1036             NULL,
1037         },
1038
1039         gen7_pp_nv12_dn_initialize,
1040     },
1041
1042     {
1043         {
1044             "NV12 DN module",
1045             PP_NV12_DN,
1046             pp_nv12_dn_gen75,
1047             sizeof(pp_nv12_dn_gen75),
1048             NULL,
1049         },
1050
1051         gen7_pp_nv12_dn_initialize,
1052     },
1053
1054     {
1055         {
1056             "NV12_PA module",
1057             PP_NV12_LOAD_SAVE_PA,
1058             pp_nv12_load_save_pa_gen75,
1059             sizeof(pp_nv12_load_save_pa_gen75),
1060             NULL,
1061         },
1062     
1063         gen7_pp_plx_avs_initialize,
1064     },
1065
1066     {
1067         {
1068             "PL3_PA module",
1069             PP_PL3_LOAD_SAVE_PA,
1070             pp_pl3_load_save_pa_gen75,
1071             sizeof(pp_pl3_load_save_pa_gen75),
1072             NULL,
1073         },
1074     
1075         gen7_pp_plx_avs_initialize,
1076     },
1077
1078     {
1079         {
1080             "PA_NV12 module",
1081             PP_PA_LOAD_SAVE_NV12,
1082             pp_pa_load_save_nv12_gen75,
1083             sizeof(pp_pa_load_save_nv12_gen75),
1084             NULL,
1085         },
1086     
1087         gen7_pp_plx_avs_initialize,
1088     },
1089
1090     {
1091         {
1092             "PA_PL3 module",
1093             PP_PA_LOAD_SAVE_PL3,
1094             pp_pa_load_save_pl3_gen75,
1095             sizeof(pp_pa_load_save_pl3_gen75),
1096             NULL,
1097         },
1098     
1099         gen7_pp_plx_avs_initialize,
1100     },
1101
1102     {
1103         {
1104             "PA_PA module",
1105             PP_PA_LOAD_SAVE_PA,
1106             pp_pa_load_save_pa_gen75,
1107             sizeof(pp_pa_load_save_pa_gen75),
1108             NULL,
1109         },
1110
1111         gen7_pp_plx_avs_initialize,
1112     },
1113
1114     {
1115         {
1116             "RGBX_NV12 module",
1117             PP_RGBX_LOAD_SAVE_NV12,
1118             pp_rgbx_load_save_nv12_gen75,
1119             sizeof(pp_rgbx_load_save_nv12_gen75),
1120             NULL,
1121         },
1122     
1123         gen7_pp_plx_avs_initialize,
1124     },
1125
1126     {
1127         {
1128             "NV12_RGBX module",
1129             PP_NV12_LOAD_SAVE_RGBX,
1130             pp_nv12_load_save_rgbx_gen75,
1131             sizeof(pp_nv12_load_save_rgbx_gen75),
1132             NULL,
1133         },
1134     
1135         gen7_pp_plx_avs_initialize,
1136     },
1137             
1138 };
1139
1140 static void
1141 pp_dndi_frame_store_reset(DNDIFrameStore *fs)
1142 {
1143     fs->obj_surface = NULL;
1144     fs->surface_id = VA_INVALID_ID;
1145     fs->is_scratch_surface = 0;
1146 }
1147
1148 static inline void
1149 pp_dndi_frame_store_swap(DNDIFrameStore *fs1, DNDIFrameStore *fs2)
1150 {
1151     const DNDIFrameStore tmpfs = *fs1;
1152     *fs1 = *fs2;
1153     *fs2 = tmpfs;
1154 }
1155
1156 static inline void
1157 pp_dndi_frame_store_clear(DNDIFrameStore *fs, VADriverContextP ctx)
1158 {
1159     if (fs->obj_surface && fs->is_scratch_surface) {
1160         VASurfaceID va_surface = fs->obj_surface->base.id;
1161         i965_DestroySurfaces(ctx, &va_surface, 1);
1162     }
1163     pp_dndi_frame_store_reset(fs);
1164 }
1165
1166 static void
1167 pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
1168 {
1169     int i;
1170
1171     memset(dndi_ctx, 0, sizeof(*dndi_ctx));
1172     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i++)
1173         pp_dndi_frame_store_reset(&dndi_ctx->frame_store[i]);
1174 }
1175
1176 static VAStatus
1177 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
1178     struct object_surface *obj_surface,
1179     const VAProcPipelineParameterBuffer *pipe_params,
1180     const VAProcFilterParameterBufferDeinterlacing *deint_params)
1181 {
1182     DNDIFrameStore *fs;
1183
1184     dndi_ctx->is_di_enabled = 1;
1185     dndi_ctx->is_di_adv_enabled = 0;
1186     dndi_ctx->is_first_frame = 0;
1187     dndi_ctx->is_second_field = 0;
1188
1189     /* Check whether we are deinterlacing the second field */
1190     if (dndi_ctx->is_di_enabled) {
1191         const unsigned int tff =
1192             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
1193         const unsigned int is_top_field =
1194             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
1195
1196         if ((tff ^ is_top_field) != 0) {
1197             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1198             if (fs->surface_id != obj_surface->base.id) {
1199                 WARN_ONCE("invalid surface provided for second field\n");
1200                 return VA_STATUS_ERROR_INVALID_PARAMETER;
1201             }
1202             dndi_ctx->is_second_field = 1;
1203         }
1204     }
1205
1206     /* Check whether we are deinterlacing the first frame */
1207     if (dndi_ctx->is_di_enabled) {
1208         switch (deint_params->algorithm) {
1209         case VAProcDeinterlacingBob:
1210             dndi_ctx->is_first_frame = 1;
1211             break;
1212         case VAProcDeinterlacingMotionAdaptive:
1213         case VAProcDeinterlacingMotionCompensated:
1214             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1215             if (fs->surface_id == VA_INVALID_ID)
1216                 dndi_ctx->is_first_frame = 1;
1217             else if (dndi_ctx->is_second_field) {
1218                 /* At this stage, we have already deinterlaced the
1219                    first field successfully. So, the first frame flag
1220                    is trigerred if the previous field was deinterlaced
1221                    without reference frame */
1222                 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1223                 if (fs->surface_id == VA_INVALID_ID)
1224                     dndi_ctx->is_first_frame = 1;
1225             }
1226             else {
1227                 if (pipe_params->num_forward_references < 1 ||
1228                     pipe_params->forward_references[0] == VA_INVALID_ID) {
1229                     WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
1230                     return VA_STATUS_ERROR_INVALID_PARAMETER;
1231                 }
1232             }
1233             dndi_ctx->is_di_adv_enabled = 1;
1234             break;
1235         default:
1236             WARN_ONCE("unsupported deinterlacing algorithm (%d)\n",
1237                       deint_params->algorithm);
1238             return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
1239         }
1240     }
1241     return VA_STATUS_SUCCESS;
1242 }
1243
1244 static VAStatus
1245 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
1246     struct i965_post_processing_context *pp_context,
1247     struct object_surface *src_surface, struct object_surface *dst_surface)
1248 {
1249     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1250     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1251     unsigned int src_fourcc, dst_fourcc;
1252     unsigned int src_sampling, dst_sampling;
1253     unsigned int src_tiling, dst_tiling;
1254     unsigned int i, swizzle;
1255     VAStatus status;
1256
1257     /* Determine input surface info. Always use NV12 Y-tiled */
1258     if (src_surface->bo) {
1259         src_fourcc = src_surface->fourcc;
1260         src_sampling = src_surface->subsampling;
1261         dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
1262         src_tiling = !!src_tiling;
1263     }
1264     else {
1265         src_fourcc = VA_FOURCC_NV12;
1266         src_sampling = SUBSAMPLE_YUV420;
1267         src_tiling = 1;
1268         status = i965_check_alloc_surface_bo(ctx, src_surface,
1269             src_tiling, src_fourcc, src_sampling);
1270         if (status != VA_STATUS_SUCCESS)
1271             return status;
1272     }
1273
1274     /* Determine output surface info. Always use NV12 Y-tiled */
1275     if (dst_surface->bo) {
1276         dst_fourcc   = dst_surface->fourcc;
1277         dst_sampling = dst_surface->subsampling;
1278         dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
1279         dst_tiling = !!dst_tiling;
1280     }
1281     else {
1282         dst_fourcc = VA_FOURCC_NV12;
1283         dst_sampling = SUBSAMPLE_YUV420;
1284         dst_tiling = 1;
1285         status = i965_check_alloc_surface_bo(ctx, dst_surface,
1286             dst_tiling, dst_fourcc, dst_sampling);
1287         if (status != VA_STATUS_SUCCESS)
1288             return status;
1289     }
1290
1291     /* Create pipeline surfaces */
1292     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i ++) {
1293         struct object_surface *obj_surface;
1294         VASurfaceID new_surface;
1295         unsigned int width, height;
1296
1297         if (dndi_ctx->frame_store[i].obj_surface &&
1298             dndi_ctx->frame_store[i].obj_surface->bo)
1299             continue; // user allocated surface, not VPP internal
1300
1301         if (dndi_ctx->frame_store[i].obj_surface) {
1302             obj_surface = dndi_ctx->frame_store[i].obj_surface;
1303             dndi_ctx->frame_store[i].is_scratch_surface = 0;
1304         } else {
1305             if (i <= DNDI_FRAME_IN_STMM) {
1306                 width = src_surface->orig_width;
1307                 height = src_surface->orig_height;
1308             }
1309             else {
1310                 width = dst_surface->orig_width;
1311                 height = dst_surface->orig_height;
1312             }
1313
1314             status = i965_CreateSurfaces(ctx, width, height, VA_RT_FORMAT_YUV420,
1315                                          1, &new_surface);
1316             if (status != VA_STATUS_SUCCESS)
1317                 return status;
1318
1319             obj_surface = SURFACE(new_surface);
1320             assert(obj_surface != NULL);
1321             dndi_ctx->frame_store[i].is_scratch_surface = 1;
1322         }
1323
1324         if (i <= DNDI_FRAME_IN_PREVIOUS) {
1325             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1326                 src_tiling, src_fourcc, src_sampling);
1327         }
1328         else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
1329             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1330                 1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
1331         }
1332         else if (i >= DNDI_FRAME_OUT_CURRENT) {
1333             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1334                 dst_tiling, dst_fourcc, dst_sampling);
1335         }
1336         if (status != VA_STATUS_SUCCESS)
1337             return status;
1338
1339         dndi_ctx->frame_store[i].obj_surface = obj_surface;
1340     }
1341     return VA_STATUS_SUCCESS;
1342 }
1343
1344 static VAStatus
1345 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
1346     struct i965_post_processing_context *pp_context,
1347     struct object_surface *src_surface, struct object_surface *dst_surface)
1348 {
1349     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1350     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1351     DNDIFrameStore *ifs, *ofs;
1352     bool is_new_frame = false;
1353
1354     /* Update the previous input surface */
1355     is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
1356         src_surface->base.id;
1357     if (is_new_frame) {
1358         ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1359         ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1360         do {
1361             const VAProcPipelineParameterBuffer * const pipe_params =
1362                 pp_context->pipeline_param;
1363             struct object_surface *obj_surface;
1364
1365             if (pipe_params->num_forward_references < 1)
1366                 break;
1367             if (pipe_params->forward_references[0] == VA_INVALID_ID)
1368                 break;
1369
1370             obj_surface = SURFACE(pipe_params->forward_references[0]);
1371             if (!obj_surface || obj_surface->base.id == ifs->surface_id)
1372                 break;
1373
1374             pp_dndi_frame_store_clear(ifs, ctx);
1375             if (obj_surface->base.id == ofs->surface_id) {
1376                 *ifs = *ofs;
1377                 pp_dndi_frame_store_reset(ofs);
1378             }
1379             else {
1380                 ifs->obj_surface = obj_surface;
1381                 ifs->surface_id = obj_surface->base.id;
1382             }
1383         } while (0);
1384     }
1385
1386     /* Update the input surface */
1387     ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1388     pp_dndi_frame_store_clear(ifs, ctx);
1389     ifs->obj_surface = src_surface;
1390     ifs->surface_id = src_surface->base.id;
1391
1392     /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
1393     if (is_new_frame)
1394         pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
1395             &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
1396
1397     /* Update the output surfaces */
1398     ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
1399     if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
1400         pp_dndi_frame_store_swap(ofs,
1401             &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
1402         if (!dndi_ctx->is_second_field)
1403             ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
1404     }
1405     pp_dndi_frame_store_clear(ofs, ctx);
1406     ofs->obj_surface = dst_surface;
1407     ofs->surface_id = dst_surface->base.id;
1408
1409     return VA_STATUS_SUCCESS;
1410 }
1411
1412 static int
1413 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1414 {
1415     int fourcc;
1416
1417     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1418         struct object_image *obj_image = (struct object_image *)surface->base;
1419         fourcc = obj_image->image.format.fourcc;
1420     } else {
1421         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1422         fourcc = obj_surface->fourcc;
1423     }
1424
1425     return fourcc;
1426 }
1427
1428 static void
1429 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1430 {
1431     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1432         struct object_image *obj_image = (struct object_image *)surface->base;
1433
1434         *width = obj_image->image.width;
1435         *height = obj_image->image.height;
1436     } else {
1437         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1438
1439         *width = obj_surface->orig_width;
1440         *height = obj_surface->orig_height;
1441     }
1442 }
1443
1444 static void
1445 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1446 {
1447     switch (tiling) {
1448     case I915_TILING_NONE:
1449         ss->ss3.tiled_surface = 0;
1450         ss->ss3.tile_walk = 0;
1451         break;
1452     case I915_TILING_X:
1453         ss->ss3.tiled_surface = 1;
1454         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1455         break;
1456     case I915_TILING_Y:
1457         ss->ss3.tiled_surface = 1;
1458         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1459         break;
1460     }
1461 }
1462
1463 static void
1464 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1465 {
1466     switch (tiling) {
1467     case I915_TILING_NONE:
1468         ss->ss2.tiled_surface = 0;
1469         ss->ss2.tile_walk = 0;
1470         break;
1471     case I915_TILING_X:
1472         ss->ss2.tiled_surface = 1;
1473         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1474         break;
1475     case I915_TILING_Y:
1476         ss->ss2.tiled_surface = 1;
1477         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1478         break;
1479     }
1480 }
1481
1482 static void
1483 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1484 {
1485     switch (tiling) {
1486     case I915_TILING_NONE:
1487         ss->ss0.tiled_surface = 0;
1488         ss->ss0.tile_walk = 0;
1489         break;
1490     case I915_TILING_X:
1491         ss->ss0.tiled_surface = 1;
1492         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1493         break;
1494     case I915_TILING_Y:
1495         ss->ss0.tiled_surface = 1;
1496         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1497         break;
1498     }
1499 }
1500
1501 static void
1502 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1503 {
1504     switch (tiling) {
1505     case I915_TILING_NONE:
1506         ss->ss2.tiled_surface = 0;
1507         ss->ss2.tile_walk = 0;
1508         break;
1509     case I915_TILING_X:
1510         ss->ss2.tiled_surface = 1;
1511         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1512         break;
1513     case I915_TILING_Y:
1514         ss->ss2.tiled_surface = 1;
1515         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1516         break;
1517     }
1518 }
1519
1520 static void
1521 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1522 {
1523     struct i965_interface_descriptor *desc;
1524     dri_bo *bo;
1525     int pp_index = pp_context->current_pp;
1526
1527     bo = pp_context->idrt.bo;
1528     dri_bo_map(bo, 1);
1529     assert(bo->virtual);
1530     desc = bo->virtual;
1531     memset(desc, 0, sizeof(*desc));
1532     desc->desc0.grf_reg_blocks = 10;
1533     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1534     desc->desc1.const_urb_entry_read_offset = 0;
1535     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1536     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1537     desc->desc2.sampler_count = 0;
1538     desc->desc3.binding_table_entry_count = 0;
1539     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1540
1541     dri_bo_emit_reloc(bo,
1542                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1543                       desc->desc0.grf_reg_blocks,
1544                       offsetof(struct i965_interface_descriptor, desc0),
1545                       pp_context->pp_modules[pp_index].kernel.bo);
1546
1547     dri_bo_emit_reloc(bo,
1548                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1549                       desc->desc2.sampler_count << 2,
1550                       offsetof(struct i965_interface_descriptor, desc2),
1551                       pp_context->sampler_state_table.bo);
1552
1553     dri_bo_unmap(bo);
1554     pp_context->idrt.num_interface_descriptors++;
1555 }
1556
1557 static void
1558 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1559 {
1560     struct i965_vfe_state *vfe_state;
1561     dri_bo *bo;
1562
1563     bo = pp_context->vfe_state.bo;
1564     dri_bo_map(bo, 1);
1565     assert(bo->virtual);
1566     vfe_state = bo->virtual;
1567     memset(vfe_state, 0, sizeof(*vfe_state));
1568     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1569     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1570     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1571     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1572     vfe_state->vfe1.children_present = 0;
1573     vfe_state->vfe2.interface_descriptor_base = 
1574         pp_context->idrt.bo->offset >> 4; /* reloc */
1575     dri_bo_emit_reloc(bo,
1576                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1577                       0,
1578                       offsetof(struct i965_vfe_state, vfe2),
1579                       pp_context->idrt.bo);
1580     dri_bo_unmap(bo);
1581 }
1582
1583 static void
1584 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1585 {
1586     unsigned char *constant_buffer;
1587     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1588
1589     assert(sizeof(*pp_static_parameter) == 128);
1590     dri_bo_map(pp_context->curbe.bo, 1);
1591     assert(pp_context->curbe.bo->virtual);
1592     constant_buffer = pp_context->curbe.bo->virtual;
1593     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1594     dri_bo_unmap(pp_context->curbe.bo);
1595 }
1596
1597 static void
1598 ironlake_pp_states_setup(VADriverContextP ctx,
1599                          struct i965_post_processing_context *pp_context)
1600 {
1601     ironlake_pp_interface_descriptor_table(pp_context);
1602     ironlake_pp_vfe_state(pp_context);
1603     ironlake_pp_upload_constants(pp_context);
1604 }
1605
1606 static void
1607 ironlake_pp_pipeline_select(VADriverContextP ctx,
1608                             struct i965_post_processing_context *pp_context)
1609 {
1610     struct intel_batchbuffer *batch = pp_context->batch;
1611
1612     BEGIN_BATCH(batch, 1);
1613     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1614     ADVANCE_BATCH(batch);
1615 }
1616
1617 static void
1618 ironlake_pp_urb_layout(VADriverContextP ctx,
1619                        struct i965_post_processing_context *pp_context)
1620 {
1621     struct intel_batchbuffer *batch = pp_context->batch;
1622     unsigned int vfe_fence, cs_fence;
1623
1624     vfe_fence = pp_context->urb.cs_start;
1625     cs_fence = pp_context->urb.size;
1626
1627     BEGIN_BATCH(batch, 3);
1628     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1629     OUT_BATCH(batch, 0);
1630     OUT_BATCH(batch, 
1631               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1632               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1633     ADVANCE_BATCH(batch);
1634 }
1635
1636 static void
1637 ironlake_pp_state_base_address(VADriverContextP ctx,
1638                                struct i965_post_processing_context *pp_context)
1639 {
1640     struct intel_batchbuffer *batch = pp_context->batch;
1641
1642     BEGIN_BATCH(batch, 8);
1643     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1644     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1645     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1646     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1647     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1648     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1649     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1650     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1651     ADVANCE_BATCH(batch);
1652 }
1653
1654 static void
1655 ironlake_pp_state_pointers(VADriverContextP ctx,
1656                            struct i965_post_processing_context *pp_context)
1657 {
1658     struct intel_batchbuffer *batch = pp_context->batch;
1659
1660     BEGIN_BATCH(batch, 3);
1661     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1662     OUT_BATCH(batch, 0);
1663     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1664     ADVANCE_BATCH(batch);
1665 }
1666
1667 static void 
1668 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1669                           struct i965_post_processing_context *pp_context)
1670 {
1671     struct intel_batchbuffer *batch = pp_context->batch;
1672
1673     BEGIN_BATCH(batch, 2);
1674     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1675     OUT_BATCH(batch,
1676               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1677               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1678     ADVANCE_BATCH(batch);
1679 }
1680
1681 static void
1682 ironlake_pp_constant_buffer(VADriverContextP ctx,
1683                             struct i965_post_processing_context *pp_context)
1684 {
1685     struct intel_batchbuffer *batch = pp_context->batch;
1686
1687     BEGIN_BATCH(batch, 2);
1688     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1689     OUT_RELOC(batch, pp_context->curbe.bo,
1690               I915_GEM_DOMAIN_INSTRUCTION, 0,
1691               pp_context->urb.size_cs_entry - 1);
1692     ADVANCE_BATCH(batch);    
1693 }
1694
1695 static void
1696 ironlake_pp_object_walker(VADriverContextP ctx,
1697                           struct i965_post_processing_context *pp_context)
1698 {
1699     struct intel_batchbuffer *batch = pp_context->batch;
1700     int x, x_steps, y, y_steps;
1701     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1702
1703     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1704     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1705
1706     for (y = 0; y < y_steps; y++) {
1707         for (x = 0; x < x_steps; x++) {
1708             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1709                 BEGIN_BATCH(batch, 20);
1710                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1711                 OUT_BATCH(batch, 0);
1712                 OUT_BATCH(batch, 0); /* no indirect data */
1713                 OUT_BATCH(batch, 0);
1714
1715                 /* inline data grf 5-6 */
1716                 assert(sizeof(*pp_inline_parameter) == 64);
1717                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1718
1719                 ADVANCE_BATCH(batch);
1720             }
1721         }
1722     }
1723 }
1724
1725 static void
1726 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1727                            struct i965_post_processing_context *pp_context)
1728 {
1729     struct intel_batchbuffer *batch = pp_context->batch;
1730
1731     intel_batchbuffer_start_atomic(batch, 0x1000);
1732     intel_batchbuffer_emit_mi_flush(batch);
1733     ironlake_pp_pipeline_select(ctx, pp_context);
1734     ironlake_pp_state_base_address(ctx, pp_context);
1735     ironlake_pp_state_pointers(ctx, pp_context);
1736     ironlake_pp_urb_layout(ctx, pp_context);
1737     ironlake_pp_cs_urb_layout(ctx, pp_context);
1738     ironlake_pp_constant_buffer(ctx, pp_context);
1739     ironlake_pp_object_walker(ctx, pp_context);
1740     intel_batchbuffer_end_atomic(batch);
1741 }
1742
1743 // update u/v offset when the surface format are packed yuv
1744 static void i965_update_src_surface_static_parameter(
1745     VADriverContextP    ctx, 
1746     struct i965_post_processing_context *pp_context,
1747     const struct i965_surface *surface)
1748 {
1749     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1750     int fourcc = pp_get_surface_fourcc(ctx, surface);
1751
1752     switch (fourcc) {
1753     case VA_FOURCC_YUY2:
1754         pp_static_parameter->grf1.source_packed_u_offset = 1;
1755         pp_static_parameter->grf1.source_packed_v_offset = 3;
1756         break;
1757     case VA_FOURCC_UYVY:
1758         pp_static_parameter->grf1.source_packed_y_offset = 1;
1759         pp_static_parameter->grf1.source_packed_v_offset = 2;
1760         break;
1761     case VA_FOURCC_BGRX:
1762     case VA_FOURCC_BGRA:
1763         pp_static_parameter->grf1.source_rgb_layout = 0;
1764         break;
1765     case VA_FOURCC_RGBX:
1766     case VA_FOURCC_RGBA:
1767         pp_static_parameter->grf1.source_rgb_layout = 1;
1768         break;
1769     default:
1770         break;
1771     }
1772     
1773 }
1774
1775 static void i965_update_dst_surface_static_parameter(
1776     VADriverContextP    ctx, 
1777     struct i965_post_processing_context *pp_context,
1778     const struct i965_surface *surface)
1779 {
1780     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1781     int fourcc = pp_get_surface_fourcc(ctx, surface);
1782
1783     switch (fourcc) {
1784     case VA_FOURCC_YUY2:
1785         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1786         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1787         break;
1788     case VA_FOURCC_UYVY:
1789         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1790         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1791         break;
1792     case VA_FOURCC_BGRX:
1793     case VA_FOURCC_BGRA:
1794         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1795         break;
1796     case VA_FOURCC_RGBX:
1797     case VA_FOURCC_RGBA:
1798         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1799         break;
1800     default:
1801         break;
1802     }
1803     
1804 }
1805
1806 static void
1807 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1808                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1809                           int width, int height, int pitch, int format, 
1810                           int index, int is_target)
1811 {
1812     struct i965_surface_state *ss;
1813     dri_bo *ss_bo;
1814     unsigned int tiling;
1815     unsigned int swizzle;
1816
1817     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1818     ss_bo = pp_context->surface_state_binding_table.bo;
1819     assert(ss_bo);
1820
1821     dri_bo_map(ss_bo, True);
1822     assert(ss_bo->virtual);
1823     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1824     memset(ss, 0, sizeof(*ss));
1825     ss->ss0.surface_type = I965_SURFACE_2D;
1826     ss->ss0.surface_format = format;
1827     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1828     ss->ss2.width = width - 1;
1829     ss->ss2.height = height - 1;
1830     ss->ss3.pitch = pitch - 1;
1831     pp_set_surface_tiling(ss, tiling);
1832     dri_bo_emit_reloc(ss_bo,
1833                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1834                       surf_bo_offset,
1835                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1836                       surf_bo);
1837     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1838     dri_bo_unmap(ss_bo);
1839 }
1840
1841 static void
1842 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1843                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1844                            int width, int height, int wpitch,
1845                            int xoffset, int yoffset,
1846                            int format, int interleave_chroma,
1847                            int index)
1848 {
1849     struct i965_surface_state2 *ss2;
1850     dri_bo *ss2_bo;
1851     unsigned int tiling;
1852     unsigned int swizzle;
1853
1854     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1855     ss2_bo = pp_context->surface_state_binding_table.bo;
1856     assert(ss2_bo);
1857
1858     dri_bo_map(ss2_bo, True);
1859     assert(ss2_bo->virtual);
1860     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1861     memset(ss2, 0, sizeof(*ss2));
1862     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1863     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1864     ss2->ss1.width = width - 1;
1865     ss2->ss1.height = height - 1;
1866     ss2->ss2.pitch = wpitch - 1;
1867     ss2->ss2.interleave_chroma = interleave_chroma;
1868     ss2->ss2.surface_format = format;
1869     ss2->ss3.x_offset_for_cb = xoffset;
1870     ss2->ss3.y_offset_for_cb = yoffset;
1871     pp_set_surface2_tiling(ss2, tiling);
1872     dri_bo_emit_reloc(ss2_bo,
1873                       I915_GEM_DOMAIN_RENDER, 0,
1874                       surf_bo_offset,
1875                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1876                       surf_bo);
1877     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1878     dri_bo_unmap(ss2_bo);
1879 }
1880
1881 static void
1882 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1883                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1884                           int width, int height, int pitch, int format, 
1885                           int index, int is_target)
1886 {
1887     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1888     struct gen7_surface_state *ss;
1889     dri_bo *ss_bo;
1890     unsigned int tiling;
1891     unsigned int swizzle;
1892
1893     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1894     ss_bo = pp_context->surface_state_binding_table.bo;
1895     assert(ss_bo);
1896
1897     dri_bo_map(ss_bo, True);
1898     assert(ss_bo->virtual);
1899     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1900     memset(ss, 0, sizeof(*ss));
1901     ss->ss0.surface_type = I965_SURFACE_2D;
1902     ss->ss0.surface_format = format;
1903     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1904     ss->ss2.width = width - 1;
1905     ss->ss2.height = height - 1;
1906     ss->ss3.pitch = pitch - 1;
1907     gen7_pp_set_surface_tiling(ss, tiling);
1908     if (IS_HASWELL(i965->intel.device_info))
1909         gen7_render_set_surface_scs(ss);
1910     dri_bo_emit_reloc(ss_bo,
1911                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1912                       surf_bo_offset,
1913                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1914                       surf_bo);
1915     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1916     dri_bo_unmap(ss_bo);
1917 }
1918
1919 static void
1920 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1921                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1922                            int width, int height, int wpitch,
1923                            int xoffset, int yoffset,
1924                            int format, int interleave_chroma,
1925                            int index)
1926 {
1927     struct gen7_surface_state2 *ss2;
1928     dri_bo *ss2_bo;
1929     unsigned int tiling;
1930     unsigned int swizzle;
1931
1932     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1933     ss2_bo = pp_context->surface_state_binding_table.bo;
1934     assert(ss2_bo);
1935
1936     dri_bo_map(ss2_bo, True);
1937     assert(ss2_bo->virtual);
1938     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1939     memset(ss2, 0, sizeof(*ss2));
1940     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1941     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1942     ss2->ss1.width = width - 1;
1943     ss2->ss1.height = height - 1;
1944     ss2->ss2.pitch = wpitch - 1;
1945     ss2->ss2.interleave_chroma = interleave_chroma;
1946     ss2->ss2.surface_format = format;
1947     ss2->ss3.x_offset_for_cb = xoffset;
1948     ss2->ss3.y_offset_for_cb = yoffset;
1949     gen7_pp_set_surface2_tiling(ss2, tiling);
1950     dri_bo_emit_reloc(ss2_bo,
1951                       I915_GEM_DOMAIN_RENDER, 0,
1952                       surf_bo_offset,
1953                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1954                       surf_bo);
1955     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1956     dri_bo_unmap(ss2_bo);
1957 }
1958
1959 static void 
1960 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1961                                 const struct i965_surface *surface, 
1962                                 int base_index, int is_target,
1963                                 int *width, int *height, int *pitch, int *offset)
1964 {
1965     struct object_surface *obj_surface;
1966     struct object_image *obj_image;
1967     dri_bo *bo;
1968     int fourcc = pp_get_surface_fourcc(ctx, surface);
1969     const int Y = 0;
1970     const int U = ((fourcc == VA_FOURCC_YV12) ||
1971                    (fourcc == VA_FOURCC_YV16))
1972                    ? 2 : 1;
1973     const int V = ((fourcc == VA_FOURCC_YV12) ||
1974                    (fourcc == VA_FOURCC_YV16))
1975                    ? 1 : 2;
1976     const int UV = 1;
1977     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1978     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1979     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1980                               fourcc == VA_FOURCC_RGBX ||
1981                               fourcc == VA_FOURCC_BGRA ||
1982                               fourcc == VA_FOURCC_BGRX);
1983     int scale_factor_of_1st_plane_width_in_byte = 1;
1984                               
1985     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1986         obj_surface = (struct object_surface *)surface->base;
1987         bo = obj_surface->bo;
1988         width[0] = obj_surface->orig_width;
1989         height[0] = obj_surface->orig_height;
1990         pitch[0] = obj_surface->width;
1991         offset[0] = 0;
1992
1993         if (full_packed_format) {
1994             scale_factor_of_1st_plane_width_in_byte = 4; 
1995         }
1996         else if (packed_yuv ) {
1997             scale_factor_of_1st_plane_width_in_byte =  2; 
1998         }
1999         else if (interleaved_uv) {
2000             width[1] = obj_surface->orig_width;
2001             height[1] = obj_surface->orig_height / 2;
2002             pitch[1] = obj_surface->width;
2003             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2004         } else {
2005             width[1] = obj_surface->orig_width / 2;
2006             height[1] = obj_surface->orig_height / 2;
2007             pitch[1] = obj_surface->width / 2;
2008             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2009             width[2] = obj_surface->orig_width / 2;
2010             height[2] = obj_surface->orig_height / 2;
2011             pitch[2] = obj_surface->width / 2;
2012             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2013         }
2014     } else {
2015         obj_image = (struct object_image *)surface->base;
2016         bo = obj_image->bo;
2017         width[0] = obj_image->image.width;
2018         height[0] = obj_image->image.height;
2019         pitch[0] = obj_image->image.pitches[0];
2020         offset[0] = obj_image->image.offsets[0];
2021
2022         if (full_packed_format) {
2023             scale_factor_of_1st_plane_width_in_byte = 4;
2024         }
2025         else if (packed_yuv ) {
2026             scale_factor_of_1st_plane_width_in_byte = 2;
2027         }
2028         else if (interleaved_uv) {
2029             width[1] = obj_image->image.width;
2030             height[1] = obj_image->image.height / 2;
2031             pitch[1] = obj_image->image.pitches[1];
2032             offset[1] = obj_image->image.offsets[1];
2033         } else {
2034             width[1] = obj_image->image.width / 2;
2035             height[1] = obj_image->image.height / 2;
2036             pitch[1] = obj_image->image.pitches[1];
2037             offset[1] = obj_image->image.offsets[1];
2038             width[2] = obj_image->image.width / 2;
2039             height[2] = obj_image->image.height / 2;
2040             pitch[2] = obj_image->image.pitches[2];
2041             offset[2] = obj_image->image.offsets[2];
2042             if (fourcc == VA_FOURCC_YV16) {
2043                 width[1] = obj_image->image.width / 2;
2044                 height[1] = obj_image->image.height;
2045                 width[2] = obj_image->image.width / 2;
2046                 height[2] = obj_image->image.height;
2047             }
2048         }
2049     }
2050
2051     /* Y surface */
2052     i965_pp_set_surface_state(ctx, pp_context,
2053                               bo, offset[Y],
2054                               ALIGN(width[Y] *scale_factor_of_1st_plane_width_in_byte, 4) / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2055                               base_index, is_target);
2056
2057     if (!packed_yuv && !full_packed_format) {
2058         if (interleaved_uv) {
2059             i965_pp_set_surface_state(ctx, pp_context,
2060                                       bo, offset[UV],
2061                                       ALIGN(width[UV], 4) / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2062                                       base_index + 1, is_target);
2063         } else {
2064             /* U surface */
2065             i965_pp_set_surface_state(ctx, pp_context,
2066                                       bo, offset[U],
2067                                       ALIGN(width[U], 4) / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2068                                       base_index + 1, is_target);
2069
2070             /* V surface */
2071             i965_pp_set_surface_state(ctx, pp_context,
2072                                       bo, offset[V],
2073                                       ALIGN(width[V], 4) / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2074                                       base_index + 2, is_target);
2075         }
2076     }
2077
2078 }
2079
2080 static void 
2081 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2082                                      const struct i965_surface *surface, 
2083                                      int base_index, int is_target,
2084                                      const VARectangle *rect,
2085                                      int *width, int *height, int *pitch, int *offset)
2086 {
2087     struct object_surface *obj_surface;
2088     struct object_image *obj_image;
2089     dri_bo *bo;
2090     int fourcc = pp_get_surface_fourcc(ctx, surface);
2091     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
2092
2093     if (fourcc_info == NULL)
2094         return;
2095
2096     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2097         obj_surface = (struct object_surface *)surface->base;
2098         bo = obj_surface->bo;
2099         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
2100         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
2101         pitch[0] = obj_surface->width;
2102         offset[0] = 0;
2103
2104         if (fourcc_info->num_planes == 1 && is_target)
2105             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2106
2107         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2108         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2109         pitch[1] = obj_surface->cb_cr_pitch;
2110         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2111
2112         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2113         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2114         pitch[2] = obj_surface->cb_cr_pitch;
2115         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2116     } else {
2117         int U = 0, V = 0;
2118
2119         /* FIXME: add support for ARGB/ABGR image */
2120         obj_image = (struct object_image *)surface->base;
2121         bo = obj_image->bo;
2122         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
2123         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
2124         pitch[0] = obj_image->image.pitches[0];
2125         offset[0] = obj_image->image.offsets[0];
2126
2127         if (fourcc_info->num_planes == 1) {
2128             if (is_target)
2129                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2130         } else if (fourcc_info->num_planes == 2) {
2131             U = 1, V = 1;
2132         } else {
2133             assert(fourcc_info->num_components == 3);
2134
2135             U = fourcc_info->components[1].plane;
2136             V = fourcc_info->components[2].plane;
2137             assert((U == 1 && V == 2) ||
2138                    (U == 2 && V == 1));
2139         }
2140
2141         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
2142         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2143         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2144         pitch[1] = obj_image->image.pitches[U];
2145         offset[1] = obj_image->image.offsets[U];
2146
2147         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2148         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2149         pitch[2] = obj_image->image.pitches[V];
2150         offset[2] = obj_image->image.offsets[V];
2151     }
2152
2153     if (is_target) {
2154         gen7_pp_set_surface_state(ctx, pp_context,
2155                                   bo, 0,
2156                                   ALIGN(width[0], 4) / 4, height[0], pitch[0],
2157                                   I965_SURFACEFORMAT_R8_UINT,
2158                                   base_index, 1);
2159
2160         if (fourcc_info->num_planes == 2) {
2161             gen7_pp_set_surface_state(ctx, pp_context,
2162                                       bo, offset[1],
2163                                       ALIGN(width[1], 2) / 2, height[1], pitch[1],
2164                                       I965_SURFACEFORMAT_R8G8_SINT,
2165                                       base_index + 1, 1);
2166         } else if (fourcc_info->num_planes == 3) {
2167             gen7_pp_set_surface_state(ctx, pp_context,
2168                                       bo, offset[1],
2169                                       ALIGN(width[1], 4) / 4, height[1], pitch[1],
2170                                       I965_SURFACEFORMAT_R8_SINT,
2171                                       base_index + 1, 1);
2172             gen7_pp_set_surface_state(ctx, pp_context,
2173                                       bo, offset[2],
2174                                       ALIGN(width[2], 4) / 4, height[2], pitch[2],
2175                                       I965_SURFACEFORMAT_R8_SINT,
2176                                       base_index + 2, 1);
2177         }
2178
2179         if (fourcc_info->format == I965_COLOR_RGB) {
2180             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2181             /* the format is MSB: X-B-G-R */
2182             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2183             if ((fourcc == VA_FOURCC_BGRA) ||
2184                 (fourcc == VA_FOURCC_BGRX)) {
2185                 /* It is stored as MSB: X-R-G-B */
2186                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2187             }
2188         }
2189     } else {
2190         int format0 = SURFACE_FORMAT_Y8_UNORM;
2191
2192         switch (fourcc) {
2193         case VA_FOURCC_YUY2:
2194             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2195             break;
2196
2197         case VA_FOURCC_UYVY:
2198             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2199             break;
2200
2201         default:
2202             break;
2203         }
2204
2205         if (fourcc_info->format == I965_COLOR_RGB) {
2206             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2207             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2208             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2209             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2210             if ((fourcc == VA_FOURCC_BGRA) ||
2211                 (fourcc == VA_FOURCC_BGRX)) {
2212                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2213             }
2214         }
2215
2216         gen7_pp_set_surface2_state(ctx, pp_context,
2217                                    bo, offset[0],
2218                                    width[0], height[0], pitch[0],
2219                                    0, 0,
2220                                    format0, 0,
2221                                    base_index);
2222
2223         if (fourcc_info->num_planes == 2) {
2224             gen7_pp_set_surface2_state(ctx, pp_context,
2225                                        bo, offset[1],
2226                                        width[1], height[1], pitch[1],
2227                                        0, 0,
2228                                        SURFACE_FORMAT_R8B8_UNORM, 0,
2229                                        base_index + 1);
2230         } else if (fourcc_info->num_planes == 3) {
2231             gen7_pp_set_surface2_state(ctx, pp_context,
2232                                        bo, offset[1],
2233                                        width[1], height[1], pitch[1],
2234                                        0, 0,
2235                                        SURFACE_FORMAT_R8_UNORM, 0,
2236                                        base_index + 1);
2237             gen7_pp_set_surface2_state(ctx, pp_context,
2238                                        bo, offset[2],
2239                                        width[2], height[2], pitch[2],
2240                                        0, 0,
2241                                        SURFACE_FORMAT_R8_UNORM, 0,
2242                                        base_index + 2);
2243         }
2244     }
2245 }
2246
2247 static int
2248 pp_null_x_steps(void *private_context)
2249 {
2250     return 1;
2251 }
2252
2253 static int
2254 pp_null_y_steps(void *private_context)
2255 {
2256     return 1;
2257 }
2258
2259 static int
2260 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2261 {
2262     return 0;
2263 }
2264
2265 static VAStatus
2266 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2267                    const struct i965_surface *src_surface,
2268                    const VARectangle *src_rect,
2269                    struct i965_surface *dst_surface,
2270                    const VARectangle *dst_rect,
2271                    void *filter_param)
2272 {
2273     /* private function & data */
2274     pp_context->pp_x_steps = pp_null_x_steps;
2275     pp_context->pp_y_steps = pp_null_y_steps;
2276     pp_context->private_context = NULL;
2277     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2278
2279     dst_surface->flags = src_surface->flags;
2280
2281     return VA_STATUS_SUCCESS;
2282 }
2283
2284 static int
2285 pp_load_save_x_steps(void *private_context)
2286 {
2287     return 1;
2288 }
2289
2290 static int
2291 pp_load_save_y_steps(void *private_context)
2292 {
2293     struct pp_load_save_context *pp_load_save_context = private_context;
2294
2295     return pp_load_save_context->dest_h / 8;
2296 }
2297
2298 static int
2299 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2300 {
2301     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2302     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2303
2304     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2305     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2306
2307     return 0;
2308 }
2309
2310 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2311 {
2312     int i;
2313     /* x offset of dest surface must be dword aligned.
2314      * so we have to extend dst surface on left edge, and mask out pixels not interested
2315      */
2316     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2317         pp_context->block_horizontal_mask_left = 0;
2318         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2319         {
2320             pp_context->block_horizontal_mask_left |= 1<<i;
2321         }
2322     }
2323     else {
2324         pp_context->block_horizontal_mask_left = 0xffff;
2325     }
2326     
2327     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2328     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2329         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2330     }
2331     else {
2332         pp_context->block_horizontal_mask_right = 0xffff;
2333     }
2334     
2335     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2336         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2337     }
2338     else {
2339         pp_context->block_vertical_mask_bottom = 0xff;
2340     }
2341
2342 }
2343 static VAStatus
2344 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2345                                 const struct i965_surface *src_surface,
2346                                 const VARectangle *src_rect,
2347                                 struct i965_surface *dst_surface,
2348                                 const VARectangle *dst_rect,
2349                                 void *filter_param)
2350 {
2351     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2352     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2353     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2354     int width[3], height[3], pitch[3], offset[3];
2355
2356     /* source surface */
2357     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2358                                     width, height, pitch, offset);
2359
2360     /* destination surface */
2361     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2362                                     width, height, pitch, offset);
2363
2364     /* private function & data */
2365     pp_context->pp_x_steps = pp_load_save_x_steps;
2366     pp_context->pp_y_steps = pp_load_save_y_steps;
2367     pp_context->private_context = &pp_context->pp_load_save_context;
2368     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2369
2370     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2371     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2372     pp_load_save_context->dest_y = dst_rect->y;
2373     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2374     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2375
2376     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2377     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2378
2379     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2380     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2381
2382     // update u/v offset for packed yuv
2383     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2384     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2385
2386     dst_surface->flags = src_surface->flags;
2387
2388     return VA_STATUS_SUCCESS;
2389 }
2390
2391 static int
2392 pp_scaling_x_steps(void *private_context)
2393 {
2394     return 1;
2395 }
2396
2397 static int
2398 pp_scaling_y_steps(void *private_context)
2399 {
2400     struct pp_scaling_context *pp_scaling_context = private_context;
2401
2402     return pp_scaling_context->dest_h / 8;
2403 }
2404
2405 static int
2406 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2407 {
2408     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2409     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2410     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2411     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2412     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2413
2414     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2415     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2416     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2417     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2418     
2419     return 0;
2420 }
2421
2422 static VAStatus
2423 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2424                            const struct i965_surface *src_surface,
2425                            const VARectangle *src_rect,
2426                            struct i965_surface *dst_surface,
2427                            const VARectangle *dst_rect,
2428                            void *filter_param)
2429 {
2430     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2431     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2432     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2433     struct object_surface *obj_surface;
2434     struct i965_sampler_state *sampler_state;
2435     int in_w, in_h, in_wpitch, in_hpitch;
2436     int out_w, out_h, out_wpitch, out_hpitch;
2437
2438     /* source surface */
2439     obj_surface = (struct object_surface *)src_surface->base;
2440     in_w = obj_surface->orig_width;
2441     in_h = obj_surface->orig_height;
2442     in_wpitch = obj_surface->width;
2443     in_hpitch = obj_surface->height;
2444
2445     /* source Y surface index 1 */
2446     i965_pp_set_surface_state(ctx, pp_context,
2447                               obj_surface->bo, 0,
2448                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2449                               1, 0);
2450
2451     /* source UV surface index 2 */
2452     i965_pp_set_surface_state(ctx, pp_context,
2453                               obj_surface->bo, in_wpitch * in_hpitch,
2454                               ALIGN(in_w, 2) / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2455                               2, 0);
2456
2457     /* destination surface */
2458     obj_surface = (struct object_surface *)dst_surface->base;
2459     out_w = obj_surface->orig_width;
2460     out_h = obj_surface->orig_height;
2461     out_wpitch = obj_surface->width;
2462     out_hpitch = obj_surface->height;
2463
2464     /* destination Y surface index 7 */
2465     i965_pp_set_surface_state(ctx, pp_context,
2466                               obj_surface->bo, 0,
2467                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2468                               7, 1);
2469
2470     /* destination UV surface index 8 */
2471     i965_pp_set_surface_state(ctx, pp_context,
2472                               obj_surface->bo, out_wpitch * out_hpitch,
2473                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2474                               8, 1);
2475
2476     /* sampler state */
2477     dri_bo_map(pp_context->sampler_state_table.bo, True);
2478     assert(pp_context->sampler_state_table.bo->virtual);
2479     sampler_state = pp_context->sampler_state_table.bo->virtual;
2480
2481     /* SIMD16 Y index 1 */
2482     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2483     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2484     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2485     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2486     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2487
2488     /* SIMD16 UV index 2 */
2489     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2490     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2491     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2492     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2493     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2494
2495     dri_bo_unmap(pp_context->sampler_state_table.bo);
2496
2497     /* private function & data */
2498     pp_context->pp_x_steps = pp_scaling_x_steps;
2499     pp_context->pp_y_steps = pp_scaling_y_steps;
2500     pp_context->private_context = &pp_context->pp_scaling_context;
2501     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2502
2503     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2504     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2505     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2506     pp_scaling_context->dest_y = dst_rect->y;
2507     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2508     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2509     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2510     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2511
2512     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2513
2514     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2515     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2516     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2517
2518     dst_surface->flags = src_surface->flags;
2519
2520     return VA_STATUS_SUCCESS;
2521 }
2522
2523 static int
2524 pp_avs_x_steps(void *private_context)
2525 {
2526     struct pp_avs_context *pp_avs_context = private_context;
2527
2528     return pp_avs_context->dest_w / 16;
2529 }
2530
2531 static int
2532 pp_avs_y_steps(void *private_context)
2533 {
2534     return 1;
2535 }
2536
2537 static int
2538 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2539 {
2540     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2541     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2542     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2543     float src_x_steping, src_y_steping, video_step_delta;
2544     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2545
2546     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2547         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2548         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2549     } else if (tmp_w >= pp_avs_context->dest_w) {
2550         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2551         pp_inline_parameter->grf6.video_step_delta = 0;
2552         
2553         if (x == 0) {
2554             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2555                 pp_avs_context->src_normalized_x;
2556         } else {
2557             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2558             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2559             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2560                 16 * 15 * video_step_delta / 2;
2561         }
2562     } else {
2563         int n0, n1, n2, nls_left, nls_right;
2564         int factor_a = 5, factor_b = 4;
2565         float f;
2566
2567         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2568         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2569         n2 = tmp_w / (16 * factor_a);
2570         nls_left = n0 + n2;
2571         nls_right = n1 + n2;
2572         f = (float) n2 * 16 / tmp_w;
2573         
2574         if (n0 < 5) {
2575             pp_inline_parameter->grf6.video_step_delta = 0.0;
2576
2577             if (x == 0) {
2578                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2579                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2580             } else {
2581                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2582                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2583                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2584                     16 * 15 * video_step_delta / 2;
2585             }
2586         } else {
2587             if (x < nls_left) {
2588                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2589                 float a = f / (nls_left * 16 * factor_b);
2590                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2591                 
2592                 pp_inline_parameter->grf6.video_step_delta = b;
2593
2594                 if (x == 0) {
2595                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2596                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2597                 } else {
2598                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2599                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2600                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2601                         16 * 15 * video_step_delta / 2;
2602                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2603                 }
2604             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2605                 /* scale the center linearly */
2606                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2607                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2608                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2609                     16 * 15 * video_step_delta / 2;
2610                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2611                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2612             } else {
2613                 float a = f / (nls_right * 16 * factor_b);
2614                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2615
2616                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2617                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2618                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2619                     16 * 15 * video_step_delta / 2;
2620                 pp_inline_parameter->grf6.video_step_delta = -b;
2621
2622                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2623                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2624                 else
2625                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2626             }
2627         }
2628     }
2629
2630     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2631     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2632     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2633     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2634
2635     return 0;
2636 }
2637
2638 static const AVSConfig gen5_avs_config = {
2639     .coeff_frac_bits = 6,
2640     .coeff_epsilon = 1.0f / (1U << 6),
2641     .num_phases = 16,
2642     .num_luma_coeffs = 8,
2643     .num_chroma_coeffs = 4,
2644
2645     .coeff_range = {
2646         .lower_bound = {
2647             .y_k_h = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2648             .y_k_v = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2649             .uv_k_h = { -1, 0, 0, -1 },
2650             .uv_k_v = { -1, 0, 0, -1 },
2651         },
2652         .upper_bound = {
2653             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2654             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2655             .uv_k_h = { 1, 2, 2, 1 },
2656             .uv_k_v = { 1, 2, 2, 1 },
2657         },
2658     },
2659 };
2660
2661 static const AVSConfig gen6_avs_config = {
2662     .coeff_frac_bits = 6,
2663     .coeff_epsilon = 1.0f / (1U << 6),
2664     .num_phases = 16,
2665     .num_luma_coeffs = 8,
2666     .num_chroma_coeffs = 4,
2667
2668     .coeff_range = {
2669         .lower_bound = {
2670             .y_k_h = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2671             .y_k_v = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2672             .uv_k_h = { -1, 0, 0, -1 },
2673             .uv_k_v = { -1, 0, 0, -1 },
2674         },
2675         .upper_bound = {
2676             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2677             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2678             .uv_k_h = { 1, 2, 2, 1 },
2679             .uv_k_v = { 1, 2, 2, 1 },
2680         },
2681     },
2682 };
2683
2684 static VAStatus
2685 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2686                        const struct i965_surface *src_surface,
2687                        const VARectangle *src_rect,
2688                        struct i965_surface *dst_surface,
2689                        const VARectangle *dst_rect,
2690                        void *filter_param)
2691 {
2692     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2693     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2694     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2695     struct object_surface *obj_surface;
2696     struct i965_sampler_8x8 *sampler_8x8;
2697     struct i965_sampler_8x8_state *sampler_8x8_state;
2698     int index;
2699     int in_w, in_h, in_wpitch, in_hpitch;
2700     int out_w, out_h, out_wpitch, out_hpitch;
2701     int i;
2702     AVSState * const avs = &pp_avs_context->state;
2703     float sx, sy;
2704
2705     const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
2706         VA_FILTER_SCALING_NL_ANAMORPHIC;
2707
2708     /* surface */
2709     obj_surface = (struct object_surface *)src_surface->base;
2710     in_w = obj_surface->orig_width;
2711     in_h = obj_surface->orig_height;
2712     in_wpitch = obj_surface->width;
2713     in_hpitch = obj_surface->height;
2714
2715     /* source Y surface index 1 */
2716     i965_pp_set_surface2_state(ctx, pp_context,
2717                                obj_surface->bo, 0,
2718                                in_w, in_h, in_wpitch,
2719                                0, 0,
2720                                SURFACE_FORMAT_Y8_UNORM, 0,
2721                                1);
2722
2723     /* source UV surface index 2 */
2724     i965_pp_set_surface2_state(ctx, pp_context,
2725                                obj_surface->bo, in_wpitch * in_hpitch,
2726                                in_w / 2, in_h / 2, in_wpitch,
2727                                0, 0,
2728                                SURFACE_FORMAT_R8B8_UNORM, 0,
2729                                2);
2730
2731     /* destination surface */
2732     obj_surface = (struct object_surface *)dst_surface->base;
2733     out_w = obj_surface->orig_width;
2734     out_h = obj_surface->orig_height;
2735     out_wpitch = obj_surface->width;
2736     out_hpitch = obj_surface->height;
2737     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2738
2739     /* destination Y surface index 7 */
2740     i965_pp_set_surface_state(ctx, pp_context,
2741                               obj_surface->bo, 0,
2742                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2743                               7, 1);
2744
2745     /* destination UV surface index 8 */
2746     i965_pp_set_surface_state(ctx, pp_context,
2747                               obj_surface->bo, out_wpitch * out_hpitch,
2748                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2749                               8, 1);
2750
2751     /* sampler 8x8 state */
2752     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2753     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2754     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2755     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2756     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2757
2758     sx = (float)dst_rect->width / src_rect->width;
2759     sy = (float)dst_rect->height / src_rect->height;
2760     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
2761
2762     assert(avs->config->num_phases == 16);
2763     for (i = 0; i <= 16; i++) {
2764         const AVSCoeffs * const coeffs = &avs->coeffs[i];
2765
2766         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
2767             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
2768         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
2769             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
2770         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
2771             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
2772         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
2773             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
2774         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
2775             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
2776         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
2777             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
2778         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
2779             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
2780         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
2781             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
2782
2783         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
2784             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
2785         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
2786             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
2787         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
2788             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
2789         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
2790             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
2791
2792         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
2793             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
2794         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
2795             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
2796         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
2797             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
2798         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
2799             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
2800         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
2801             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
2802         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
2803             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
2804         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
2805             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
2806         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
2807             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
2808
2809         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
2810             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
2811         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
2812             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
2813         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
2814             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
2815         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
2816             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
2817     }
2818
2819     /* Adaptive filter for all channels (DW4.15) */
2820     sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
2821
2822     sampler_8x8_state->dw136.default_sharpness_level =
2823         -avs_is_needed(pp_context->filter_flags);
2824     sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
2825     sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
2826     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2827
2828     /* sampler 8x8 */
2829     dri_bo_map(pp_context->sampler_state_table.bo, True);
2830     assert(pp_context->sampler_state_table.bo->virtual);
2831     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2832     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2833
2834     /* sample_8x8 Y index 1 */
2835     index = 1;
2836     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2837     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2838     sampler_8x8[index].dw0.ief_bypass = 1;
2839     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2840     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2841     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2842     sampler_8x8[index].dw2.global_noise_estimation = 22;
2843     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2844     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2845     sampler_8x8[index].dw3.strong_edge_weight = 7;
2846     sampler_8x8[index].dw3.regular_weight = 2;
2847     sampler_8x8[index].dw3.non_edge_weight = 0;
2848     sampler_8x8[index].dw3.gain_factor = 40;
2849     sampler_8x8[index].dw4.steepness_boost = 0;
2850     sampler_8x8[index].dw4.steepness_threshold = 0;
2851     sampler_8x8[index].dw4.mr_boost = 0;
2852     sampler_8x8[index].dw4.mr_threshold = 5;
2853     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2854     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2855     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2856     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2857     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2858     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2859     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2860     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2861     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2862     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2863     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2864     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2865     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2866     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2867     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2868     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2869     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2870     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2871     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2872     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2873     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2874     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2875     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2876     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2877     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2878     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2879     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2880     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2881     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2882     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2883     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2884     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2885     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2886     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2887     sampler_8x8[index].dw13.limiter_boost = 0;
2888     sampler_8x8[index].dw13.minimum_limiter = 10;
2889     sampler_8x8[index].dw13.maximum_limiter = 11;
2890     sampler_8x8[index].dw14.clip_limiter = 130;
2891     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2892                       I915_GEM_DOMAIN_RENDER, 
2893                       0,
2894                       0,
2895                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2896                       pp_context->sampler_state_table.bo_8x8);
2897
2898     /* sample_8x8 UV index 2 */
2899     index = 2;
2900     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2901     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2902     sampler_8x8[index].dw0.ief_bypass = 1;
2903     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2904     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2905     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2906     sampler_8x8[index].dw2.global_noise_estimation = 22;
2907     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2908     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2909     sampler_8x8[index].dw3.strong_edge_weight = 7;
2910     sampler_8x8[index].dw3.regular_weight = 2;
2911     sampler_8x8[index].dw3.non_edge_weight = 0;
2912     sampler_8x8[index].dw3.gain_factor = 40;
2913     sampler_8x8[index].dw4.steepness_boost = 0;
2914     sampler_8x8[index].dw4.steepness_threshold = 0;
2915     sampler_8x8[index].dw4.mr_boost = 0;
2916     sampler_8x8[index].dw4.mr_threshold = 5;
2917     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2918     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2919     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2920     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2921     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2922     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2923     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2924     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2925     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2926     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2927     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2928     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2929     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2930     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2931     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2932     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2933     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2934     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2935     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2936     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2937     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2938     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2939     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2940     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2941     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2942     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2943     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2944     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2945     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2946     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2947     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2948     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2949     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2950     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2951     sampler_8x8[index].dw13.limiter_boost = 0;
2952     sampler_8x8[index].dw13.minimum_limiter = 10;
2953     sampler_8x8[index].dw13.maximum_limiter = 11;
2954     sampler_8x8[index].dw14.clip_limiter = 130;
2955     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2956                       I915_GEM_DOMAIN_RENDER, 
2957                       0,
2958                       0,
2959                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2960                       pp_context->sampler_state_table.bo_8x8);
2961
2962     dri_bo_unmap(pp_context->sampler_state_table.bo);
2963
2964     /* private function & data */
2965     pp_context->pp_x_steps = pp_avs_x_steps;
2966     pp_context->pp_y_steps = pp_avs_y_steps;
2967     pp_context->private_context = &pp_context->pp_avs_context;
2968     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2969
2970     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2971     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2972     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2973     pp_avs_context->dest_y = dst_rect->y;
2974     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2975     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2976     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2977     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2978     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2979     pp_avs_context->src_h = src_rect->height;
2980
2981     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2982     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2983
2984     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2985     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2986     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2987     pp_inline_parameter->grf6.video_step_delta = 0.0;
2988
2989     dst_surface->flags = src_surface->flags;
2990
2991     return VA_STATUS_SUCCESS;
2992 }
2993
2994 static VAStatus
2995 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2996                              const struct i965_surface *src_surface,
2997                              const VARectangle *src_rect,
2998                              struct i965_surface *dst_surface,
2999                              const VARectangle *dst_rect,
3000                              void *filter_param)
3001 {
3002     return pp_nv12_avs_initialize(ctx, pp_context,
3003                                   src_surface,
3004                                   src_rect,
3005                                   dst_surface,
3006                                   dst_rect,
3007                                   filter_param);
3008 }
3009
3010 static int
3011 gen7_pp_avs_x_steps(void *private_context)
3012 {
3013     struct pp_avs_context *pp_avs_context = private_context;
3014
3015     return pp_avs_context->dest_w / 16;
3016 }
3017
3018 static int
3019 gen7_pp_avs_y_steps(void *private_context)
3020 {
3021     struct pp_avs_context *pp_avs_context = private_context;
3022
3023     return pp_avs_context->dest_h / 16;
3024 }
3025
3026 static int
3027 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3028 {
3029     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3030     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3031
3032     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3033     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3034     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
3035     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3036
3037     return 0;
3038 }
3039
3040 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
3041                                               struct i965_post_processing_context *pp_context,
3042                                               const struct i965_surface *surface)
3043 {
3044     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3045     int fourcc = pp_get_surface_fourcc(ctx, surface);
3046     
3047     if (fourcc == VA_FOURCC_YUY2) {
3048         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3049         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3050         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3051     } else if (fourcc == VA_FOURCC_UYVY) {
3052         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3053         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3054         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3055     }
3056 }
3057
3058 static VAStatus
3059 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3060                            const struct i965_surface *src_surface,
3061                            const VARectangle *src_rect,
3062                            struct i965_surface *dst_surface,
3063                            const VARectangle *dst_rect,
3064                            void *filter_param)
3065 {
3066     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3067     struct i965_driver_data *i965 = i965_driver_data(ctx);
3068     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3069     struct gen7_sampler_8x8 *sampler_8x8;
3070     struct i965_sampler_8x8_state *sampler_8x8_state;
3071     int index, i;
3072     int width[3], height[3], pitch[3], offset[3];
3073     int src_width, src_height;
3074     AVSState * const avs = &pp_avs_context->state;
3075     float sx, sy;
3076
3077     /* source surface */
3078     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3079                                          src_rect,
3080                                          width, height, pitch, offset);
3081     src_width = width[0];
3082     src_height = height[0];
3083
3084     /* destination surface */
3085     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3086                                          dst_rect,
3087                                          width, height, pitch, offset);
3088
3089     /* sampler 8x8 state */
3090     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3091     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3092     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3093     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3094     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3095
3096     sx = (float)dst_rect->width / src_rect->width;
3097     sy = (float)dst_rect->height / src_rect->height;
3098     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
3099
3100     assert(avs->config->num_phases == 16);
3101     for (i = 0; i <= 16; i++) {
3102         const AVSCoeffs * const coeffs = &avs->coeffs[i];
3103
3104         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
3105             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
3106         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
3107             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
3108         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
3109             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
3110         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
3111             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
3112         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
3113             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
3114         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
3115             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
3116         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
3117             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
3118         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
3119             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
3120
3121         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
3122             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
3123         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
3124             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
3125         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
3126             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
3127         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
3128             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
3129
3130         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
3131             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
3132         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
3133             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
3134         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
3135             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
3136         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
3137             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
3138         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
3139             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
3140         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
3141             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
3142         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
3143             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
3144         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
3145             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
3146
3147         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
3148             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
3149         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
3150             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
3151         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
3152             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
3153         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
3154             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
3155     }
3156
3157     sampler_8x8_state->dw136.default_sharpness_level =
3158         -avs_is_needed(pp_context->filter_flags);
3159     if (IS_HASWELL(i965->intel.device_info)) {
3160         sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
3161         sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
3162         sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
3163     }
3164     else {
3165         sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
3166         sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
3167         sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
3168     }
3169     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3170
3171     /* sampler 8x8 */
3172     dri_bo_map(pp_context->sampler_state_table.bo, True);
3173     assert(pp_context->sampler_state_table.bo->virtual);
3174     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3175     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3176
3177     /* sample_8x8 Y index 4 */
3178     index = 4;
3179     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3180     sampler_8x8[index].dw0.global_noise_estimation = 255;
3181     sampler_8x8[index].dw0.ief_bypass = 1;
3182
3183     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3184
3185     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3186     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3187     sampler_8x8[index].dw2.r5x_coefficient = 9;
3188     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3189     sampler_8x8[index].dw2.r5c_coefficient = 3;
3190
3191     sampler_8x8[index].dw3.r3x_coefficient = 27;
3192     sampler_8x8[index].dw3.r3c_coefficient = 5;
3193     sampler_8x8[index].dw3.gain_factor = 40;
3194     sampler_8x8[index].dw3.non_edge_weight = 1;
3195     sampler_8x8[index].dw3.regular_weight = 2;
3196     sampler_8x8[index].dw3.strong_edge_weight = 7;
3197     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3198
3199     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3200                       I915_GEM_DOMAIN_RENDER, 
3201                       0,
3202                       0,
3203                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3204                       pp_context->sampler_state_table.bo_8x8);
3205
3206     /* sample_8x8 UV index 8 */
3207     index = 8;
3208     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3209     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3210     sampler_8x8[index].dw0.global_noise_estimation = 255;
3211     sampler_8x8[index].dw0.ief_bypass = 1;
3212     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3213     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3214     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3215     sampler_8x8[index].dw2.r5x_coefficient = 9;
3216     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3217     sampler_8x8[index].dw2.r5c_coefficient = 3;
3218     sampler_8x8[index].dw3.r3x_coefficient = 27;
3219     sampler_8x8[index].dw3.r3c_coefficient = 5;
3220     sampler_8x8[index].dw3.gain_factor = 40;
3221     sampler_8x8[index].dw3.non_edge_weight = 1;
3222     sampler_8x8[index].dw3.regular_weight = 2;
3223     sampler_8x8[index].dw3.strong_edge_weight = 7;
3224     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3225
3226     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3227                       I915_GEM_DOMAIN_RENDER, 
3228                       0,
3229                       0,
3230                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3231                       pp_context->sampler_state_table.bo_8x8);
3232
3233     /* sampler_8x8 V, index 12 */
3234     index = 12;
3235     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3236     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3237     sampler_8x8[index].dw0.global_noise_estimation = 255;
3238     sampler_8x8[index].dw0.ief_bypass = 1;
3239     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3240     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3241     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3242     sampler_8x8[index].dw2.r5x_coefficient = 9;
3243     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3244     sampler_8x8[index].dw2.r5c_coefficient = 3;
3245     sampler_8x8[index].dw3.r3x_coefficient = 27;
3246     sampler_8x8[index].dw3.r3c_coefficient = 5;
3247     sampler_8x8[index].dw3.gain_factor = 40;
3248     sampler_8x8[index].dw3.non_edge_weight = 1;
3249     sampler_8x8[index].dw3.regular_weight = 2;
3250     sampler_8x8[index].dw3.strong_edge_weight = 7;
3251     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3252
3253     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3254                       I915_GEM_DOMAIN_RENDER, 
3255                       0,
3256                       0,
3257                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3258                       pp_context->sampler_state_table.bo_8x8);
3259
3260     dri_bo_unmap(pp_context->sampler_state_table.bo);
3261
3262     /* private function & data */
3263     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3264     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3265     pp_context->private_context = &pp_context->pp_avs_context;
3266     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3267
3268     pp_avs_context->dest_x = dst_rect->x;
3269     pp_avs_context->dest_y = dst_rect->y;
3270     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3271     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3272     pp_avs_context->src_w = src_rect->width;
3273     pp_avs_context->src_h = src_rect->height;
3274     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3275
3276     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3277     dw = MAX(dw, dst_rect->width);
3278
3279     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3280     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3281     if (IS_HASWELL(i965->intel.device_info))
3282         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3283
3284     if (pp_static_parameter->grf2.avs_wa_enable) {
3285         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3286         if ((src_fourcc == VA_FOURCC_RGBA) ||
3287             (src_fourcc == VA_FOURCC_RGBX) ||
3288             (src_fourcc == VA_FOURCC_BGRA) ||
3289             (src_fourcc == VA_FOURCC_BGRX)) {
3290             pp_static_parameter->grf2.avs_wa_enable = 0;
3291         }
3292     }
3293         
3294     pp_static_parameter->grf2.avs_wa_width = src_width;
3295     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3296     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3297     pp_static_parameter->grf2.alpha = 255;
3298
3299     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3300     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3301     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3302         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3303     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3304         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3305
3306     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3307
3308     dst_surface->flags = src_surface->flags;
3309
3310     return VA_STATUS_SUCCESS;
3311 }
3312
3313 static int
3314 pp_dndi_x_steps(void *private_context)
3315 {
3316     return 1;
3317 }
3318
3319 static int
3320 pp_dndi_y_steps(void *private_context)
3321 {
3322     struct pp_dndi_context *pp_dndi_context = private_context;
3323
3324     return pp_dndi_context->dest_h / 4;
3325 }
3326
3327 static int
3328 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3329 {
3330     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3331
3332     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3333     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3334
3335     return 0;
3336 }
3337
3338 static VAStatus
3339 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3340                         const struct i965_surface *src_surface,
3341                         const VARectangle *src_rect,
3342                         struct i965_surface *dst_surface,
3343                         const VARectangle *dst_rect,
3344                         void *filter_param)
3345 {
3346     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3347     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3348     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3349     const VAProcPipelineParameterBuffer * const pipe_params =
3350         pp_context->pipeline_param;
3351     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3352         filter_param;
3353     struct object_surface * const src_obj_surface = (struct object_surface *)
3354         src_surface->base;
3355     struct object_surface * const dst_obj_surface = (struct object_surface *)
3356         dst_surface->base;
3357     struct object_surface *obj_surface;
3358     struct i965_sampler_dndi *sampler_dndi;
3359     int index, dndi_top_first;
3360     int w, h, orig_w, orig_h;
3361     VAStatus status;
3362
3363     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3364         pipe_params, deint_params);
3365     if (status != VA_STATUS_SUCCESS)
3366         return status;
3367
3368     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3369         src_obj_surface, dst_obj_surface);
3370     if (status != VA_STATUS_SUCCESS)
3371         return status;
3372
3373     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3374         src_obj_surface, dst_obj_surface);
3375     if (status != VA_STATUS_SUCCESS)
3376         return status;
3377
3378     /* Current input surface (index = 4) */
3379     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3380     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3381         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3382         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3383
3384     /* Previous input surface (index = 5) */
3385     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3386     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3387         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3388         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
3389
3390     /* STMM input surface (index = 6) */
3391     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3392     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3393         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3394         I965_SURFACEFORMAT_R8_UNORM, 6, 1);
3395
3396     /* Previous output surfaces (index = { 7, 8 }) */
3397     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3398     w = obj_surface->width;
3399     h = obj_surface->height;
3400     orig_w = obj_surface->orig_width;
3401     orig_h = obj_surface->orig_height;
3402
3403     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3404         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
3405     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3406         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
3407
3408     /* Current output surfaces (index = { 10, 11 }) */
3409     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3410     w = obj_surface->width;
3411     h = obj_surface->height;
3412     orig_w = obj_surface->orig_width;
3413     orig_h = obj_surface->orig_height;
3414
3415     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3416         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
3417     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3418         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
3419
3420     /* STMM output surface (index = 20) */
3421     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3422     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3423         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3424         I965_SURFACEFORMAT_R8_UNORM, 20, 1);
3425
3426     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3427
3428     /* sampler dndi */
3429     dri_bo_map(pp_context->sampler_state_table.bo, True);
3430     assert(pp_context->sampler_state_table.bo->virtual);
3431     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3432     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3433
3434     /* sample dndi index 1 */
3435     index = 0;
3436     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3437     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3438     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3439     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3440
3441     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3442     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3443     sampler_dndi[index].dw1.stmm_c2 = 1;
3444     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3445     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3446
3447     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3448     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3449     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3450     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3451
3452     sampler_dndi[index].dw3.maximum_stmm = 150;
3453     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3454     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3455     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3456     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3457
3458     sampler_dndi[index].dw4.sdi_delta = 5;
3459     sampler_dndi[index].dw4.sdi_threshold = 100;
3460     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3461     sampler_dndi[index].dw4.stmm_shift_up = 1;
3462     sampler_dndi[index].dw4.stmm_shift_down = 3;
3463     sampler_dndi[index].dw4.minimum_stmm = 118;
3464
3465     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3466     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3467     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3468     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3469
3470     sampler_dndi[index].dw6.dn_enable = 1;
3471     sampler_dndi[index].dw6.di_enable = 1;
3472     sampler_dndi[index].dw6.di_partial = 0;
3473     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3474     sampler_dndi[index].dw6.dndi_stream_id = 0;
3475     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3476     sampler_dndi[index].dw6.progressive_dn = 0;
3477     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3478     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3479     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3480
3481     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3482     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3483     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3484     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3485
3486     dri_bo_unmap(pp_context->sampler_state_table.bo);
3487
3488     /* private function & data */
3489     pp_context->pp_x_steps = pp_dndi_x_steps;
3490     pp_context->pp_y_steps = pp_dndi_y_steps;
3491     pp_context->private_context = dndi_ctx;
3492     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3493
3494     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3495     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3496     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3497     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3498
3499     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3500     pp_inline_parameter->grf5.number_blocks = w / 16;
3501     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3502     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3503
3504     dndi_ctx->dest_w = w;
3505     dndi_ctx->dest_h = h;
3506
3507     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3508     return VA_STATUS_SUCCESS;
3509 }
3510
3511 static int
3512 pp_dn_x_steps(void *private_context)
3513 {
3514     return 1;
3515 }
3516
3517 static int
3518 pp_dn_y_steps(void *private_context)
3519 {
3520     struct pp_dn_context *pp_dn_context = private_context;
3521
3522     return pp_dn_context->dest_h / 8;
3523 }
3524
3525 static int
3526 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3527 {
3528     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3529
3530     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3531     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3532
3533     return 0;
3534 }
3535
3536 static VAStatus
3537 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3538                       const struct i965_surface *src_surface,
3539                       const VARectangle *src_rect,
3540                       struct i965_surface *dst_surface,
3541                       const VARectangle *dst_rect,
3542                       void *filter_param)
3543 {
3544     struct i965_driver_data *i965 = i965_driver_data(ctx);
3545     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3546     struct object_surface *obj_surface;
3547     struct i965_sampler_dndi *sampler_dndi;
3548     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3549     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3550     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3551     int index;
3552     int w, h;
3553     int orig_w, orig_h;
3554     int dn_strength = 15;
3555     int dndi_top_first = 1;
3556     int dn_progressive = 0;
3557
3558     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3559         dndi_top_first = 1;
3560         dn_progressive = 1;
3561     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3562         dndi_top_first = 1;
3563         dn_progressive = 0;
3564     } else {
3565         dndi_top_first = 0;
3566         dn_progressive = 0;
3567     }
3568
3569     if (dn_filter_param) {
3570         float value = dn_filter_param->value;
3571         
3572         if (value > 1.0)
3573             value = 1.0;
3574         
3575         if (value < 0.0)
3576             value = 0.0;
3577
3578         dn_strength = (int)(value * 31.0F);
3579     }
3580
3581     /* surface */
3582     obj_surface = (struct object_surface *)src_surface->base;
3583     orig_w = obj_surface->orig_width;
3584     orig_h = obj_surface->orig_height;
3585     w = obj_surface->width;
3586     h = obj_surface->height;
3587
3588     if (pp_dn_context->stmm_bo == NULL) {
3589         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3590                                               "STMM surface",
3591                                               w * h,
3592                                               4096);
3593         assert(pp_dn_context->stmm_bo);
3594     }
3595
3596     /* source UV surface index 2 */
3597     i965_pp_set_surface_state(ctx, pp_context,
3598                               obj_surface->bo, w * h,
3599                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3600                               2, 0);
3601
3602     /* source YUV surface index 4 */
3603     i965_pp_set_surface2_state(ctx, pp_context,
3604                                obj_surface->bo, 0,
3605                                orig_w, orig_h, w,
3606                                0, h,
3607                                SURFACE_FORMAT_PLANAR_420_8, 1,
3608                                4);
3609
3610     /* source STMM surface index 20 */
3611     i965_pp_set_surface_state(ctx, pp_context,
3612                               pp_dn_context->stmm_bo, 0,
3613                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3614                               20, 1);
3615
3616     /* destination surface */
3617     obj_surface = (struct object_surface *)dst_surface->base;
3618     orig_w = obj_surface->orig_width;
3619     orig_h = obj_surface->orig_height;
3620     w = obj_surface->width;
3621     h = obj_surface->height;
3622
3623     /* destination Y surface index 7 */
3624     i965_pp_set_surface_state(ctx, pp_context,
3625                               obj_surface->bo, 0,
3626                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3627                               7, 1);
3628
3629     /* destination UV surface index 8 */
3630     i965_pp_set_surface_state(ctx, pp_context,
3631                               obj_surface->bo, w * h,
3632                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3633                               8, 1);
3634     /* sampler dn */
3635     dri_bo_map(pp_context->sampler_state_table.bo, True);
3636     assert(pp_context->sampler_state_table.bo->virtual);
3637     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3638     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3639
3640     /* sample dndi index 1 */
3641     index = 0;
3642     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3643     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3644     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3645     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3646
3647     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3648     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3649     sampler_dndi[index].dw1.stmm_c2 = 0;
3650     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3651     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3652
3653     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3654     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3655     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3656     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3657
3658     sampler_dndi[index].dw3.maximum_stmm = 128;
3659     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3660     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3661     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3662     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3663
3664     sampler_dndi[index].dw4.sdi_delta = 8;
3665     sampler_dndi[index].dw4.sdi_threshold = 128;
3666     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3667     sampler_dndi[index].dw4.stmm_shift_up = 0;
3668     sampler_dndi[index].dw4.stmm_shift_down = 0;
3669     sampler_dndi[index].dw4.minimum_stmm = 0;
3670
3671     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3672     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3673     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3674     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3675
3676     sampler_dndi[index].dw6.dn_enable = 1;
3677     sampler_dndi[index].dw6.di_enable = 0;
3678     sampler_dndi[index].dw6.di_partial = 0;
3679     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3680     sampler_dndi[index].dw6.dndi_stream_id = 1;
3681     sampler_dndi[index].dw6.dndi_first_frame = 1;
3682     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3683     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3684     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3685     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3686
3687     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3688     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3689     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3690     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3691
3692     dri_bo_unmap(pp_context->sampler_state_table.bo);
3693
3694     /* private function & data */
3695     pp_context->pp_x_steps = pp_dn_x_steps;
3696     pp_context->pp_y_steps = pp_dn_y_steps;
3697     pp_context->private_context = &pp_context->pp_dn_context;
3698     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3699
3700     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3701     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3702     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3703     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3704
3705     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3706     pp_inline_parameter->grf5.number_blocks = w / 16;
3707     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3708     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3709
3710     pp_dn_context->dest_w = w;
3711     pp_dn_context->dest_h = h;
3712
3713     dst_surface->flags = src_surface->flags;
3714     
3715     return VA_STATUS_SUCCESS;
3716 }
3717
3718 static int
3719 gen7_pp_dndi_x_steps(void *private_context)
3720 {
3721     struct pp_dndi_context *pp_dndi_context = private_context;
3722
3723     return pp_dndi_context->dest_w / 16;
3724 }
3725
3726 static int
3727 gen7_pp_dndi_y_steps(void *private_context)
3728 {
3729     struct pp_dndi_context *pp_dndi_context = private_context;
3730
3731     return pp_dndi_context->dest_h / 4;
3732 }
3733
3734 static int
3735 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3736 {
3737     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3738
3739     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3740     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3741
3742     return 0;
3743 }
3744
3745 static VAStatus
3746 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3747                              const struct i965_surface *src_surface,
3748                              const VARectangle *src_rect,
3749                              struct i965_surface *dst_surface,
3750                              const VARectangle *dst_rect,
3751                              void *filter_param)
3752 {
3753     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3754     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3755     const VAProcPipelineParameterBuffer * const pipe_params =
3756         pp_context->pipeline_param;
3757     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3758         filter_param;
3759     struct object_surface * const src_obj_surface = (struct object_surface *)
3760         src_surface->base;
3761     struct object_surface * const dst_obj_surface = (struct object_surface *)
3762         dst_surface->base;
3763     struct object_surface *obj_surface;
3764     struct gen7_sampler_dndi *sampler_dndi;
3765     int index, dndi_top_first;
3766     int w, h, orig_w, orig_h;
3767     VAStatus status;
3768
3769     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3770         pipe_params, deint_params);
3771     if (status != VA_STATUS_SUCCESS)
3772         return status;
3773
3774     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3775         src_obj_surface, dst_obj_surface);
3776     if (status != VA_STATUS_SUCCESS)
3777         return status;
3778
3779     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3780         src_obj_surface, dst_obj_surface);
3781     if (status != VA_STATUS_SUCCESS)
3782         return status;
3783
3784     /* Current input surface (index = 3) */
3785     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3786     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3787         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3788         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
3789
3790     /* Previous input surface (index = 4) */
3791     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3792     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3793         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3794         0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3795
3796     /* STMM input surface (index = 5) */
3797     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3798     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3799         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3800         I965_SURFACEFORMAT_R8_UNORM, 5, 1);
3801
3802     /* Previous output surfaces (index = { 27, 28 }) */
3803     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3804     w = obj_surface->width;
3805     h = obj_surface->height;
3806     orig_w = obj_surface->orig_width;
3807     orig_h = obj_surface->orig_height;
3808
3809     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3810         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
3811     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3812         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
3813
3814     /* Current output surfaces (index = { 30, 31 }) */
3815     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3816     w = obj_surface->width;
3817     h = obj_surface->height;
3818     orig_w = obj_surface->orig_width;
3819     orig_h = obj_surface->orig_height;
3820
3821     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3822         ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
3823     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3824         ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
3825
3826     /* STMM output surface (index = 33) */
3827     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3828     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3829         obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3830         I965_SURFACEFORMAT_R8_UNORM, 33, 1);
3831
3832     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3833
3834     /* sampler dndi */
3835     dri_bo_map(pp_context->sampler_state_table.bo, True);
3836     assert(pp_context->sampler_state_table.bo->virtual);
3837     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3838     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3839
3840     /* sample dndi index 0 */
3841     index = 0;
3842     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3843     sampler_dndi[index].dw0.dnmh_delt = 7;
3844     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3845     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3846     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3847     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3848
3849     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3850     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3851     sampler_dndi[index].dw1.stmm_c2 = 2;
3852     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3853     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3854
3855     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3856     sampler_dndi[index].dw2.bne_edge_th = 1;
3857     sampler_dndi[index].dw2.smooth_mv_th = 0;
3858     sampler_dndi[index].dw2.sad_tight_th = 5;
3859     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3860     sampler_dndi[index].dw2.good_neighbor_th = 12;
3861
3862     sampler_dndi[index].dw3.maximum_stmm = 150;
3863     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3864     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3865     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3866     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3867
3868     sampler_dndi[index].dw4.sdi_delta = 5;
3869     sampler_dndi[index].dw4.sdi_threshold = 100;
3870     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3871     sampler_dndi[index].dw4.stmm_shift_up = 1;
3872     sampler_dndi[index].dw4.stmm_shift_down = 3;
3873     sampler_dndi[index].dw4.minimum_stmm = 118;
3874
3875     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3876     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3877     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3878     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3879     sampler_dndi[index].dw6.dn_enable = 0;
3880     sampler_dndi[index].dw6.di_enable = 1;
3881     sampler_dndi[index].dw6.di_partial = 0;
3882     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3883     sampler_dndi[index].dw6.dndi_stream_id = 1;
3884     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3885     sampler_dndi[index].dw6.progressive_dn = 0;
3886     sampler_dndi[index].dw6.mcdi_enable =
3887         (deint_params->algorithm == VAProcDeinterlacingMotionCompensated);
3888     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3889     sampler_dndi[index].dw6.cat_th1 = 0;
3890     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3891     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3892
3893     sampler_dndi[index].dw7.sad_tha = 5;
3894     sampler_dndi[index].dw7.sad_thb = 10;
3895     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3896     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3897     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3898     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3899     sampler_dndi[index].dw7.neighborpixel_th = 10;
3900     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3901
3902     dri_bo_unmap(pp_context->sampler_state_table.bo);
3903
3904     /* private function & data */
3905     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3906     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3907     pp_context->private_context = dndi_ctx;
3908     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3909
3910     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3911     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3912     pp_static_parameter->grf1.di_top_field_first = 0;
3913     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3914
3915     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3916     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3917     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3918
3919     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3920     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3921
3922     dndi_ctx->dest_w = w;
3923     dndi_ctx->dest_h = h;
3924
3925     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3926     return VA_STATUS_SUCCESS;
3927 }
3928
3929 static int
3930 gen7_pp_dn_x_steps(void *private_context)
3931 {
3932     struct pp_dn_context *pp_dn_context = private_context;
3933
3934     return pp_dn_context->dest_w / 16;
3935 }
3936
3937 static int
3938 gen7_pp_dn_y_steps(void *private_context)
3939 {
3940     struct pp_dn_context *pp_dn_context = private_context;
3941
3942     return pp_dn_context->dest_h / 4;
3943 }
3944
3945 static int
3946 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3947 {
3948     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3949
3950     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3951     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3952
3953     return 0;
3954 }
3955
3956 static VAStatus
3957 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3958                            const struct i965_surface *src_surface,
3959                            const VARectangle *src_rect,
3960                            struct i965_surface *dst_surface,
3961                            const VARectangle *dst_rect,
3962                            void *filter_param)
3963 {
3964     struct i965_driver_data *i965 = i965_driver_data(ctx);
3965     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3966     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3967     struct object_surface *obj_surface;
3968     struct gen7_sampler_dndi *sampler_dn;
3969     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3970     int index;
3971     int w, h;
3972     int orig_w, orig_h;
3973     int dn_strength = 15;
3974     int dndi_top_first = 1;
3975     int dn_progressive = 0;
3976
3977     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3978         dndi_top_first = 1;
3979         dn_progressive = 1;
3980     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3981         dndi_top_first = 1;
3982         dn_progressive = 0;
3983     } else {
3984         dndi_top_first = 0;
3985         dn_progressive = 0;
3986     }
3987
3988     if (dn_filter_param) {
3989         float value = dn_filter_param->value;
3990         
3991         if (value > 1.0)
3992             value = 1.0;
3993         
3994         if (value < 0.0)
3995             value = 0.0;
3996
3997         dn_strength = (int)(value * 31.0F);
3998     }
3999
4000     /* surface */
4001     obj_surface = (struct object_surface *)src_surface->base;
4002     orig_w = obj_surface->orig_width;
4003     orig_h = obj_surface->orig_height;
4004     w = obj_surface->width;
4005     h = obj_surface->height;
4006
4007     if (pp_dn_context->stmm_bo == NULL) {
4008         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
4009                                              "STMM surface",
4010                                              w * h,
4011                                              4096);
4012         assert(pp_dn_context->stmm_bo);
4013     }
4014
4015     /* source UV surface index 1 */
4016     gen7_pp_set_surface_state(ctx, pp_context,
4017                               obj_surface->bo, w * h,
4018                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4019                               1, 0);
4020
4021     /* source YUV surface index 3 */
4022     gen7_pp_set_surface2_state(ctx, pp_context,
4023                                obj_surface->bo, 0,
4024                                orig_w, orig_h, w,
4025                                0, h,
4026                                SURFACE_FORMAT_PLANAR_420_8, 1,
4027                                3);
4028
4029     /* source (temporal reference) YUV surface index 4 */
4030     gen7_pp_set_surface2_state(ctx, pp_context,
4031                                obj_surface->bo, 0,
4032                                orig_w, orig_h, w,
4033                                0, h,
4034                                SURFACE_FORMAT_PLANAR_420_8, 1,
4035                                4);
4036
4037     /* STMM / History Statistics input surface, index 5 */
4038     gen7_pp_set_surface_state(ctx, pp_context,
4039                               pp_dn_context->stmm_bo, 0,
4040                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4041                               33, 1);
4042
4043     /* destination surface */
4044     obj_surface = (struct object_surface *)dst_surface->base;
4045     orig_w = obj_surface->orig_width;
4046     orig_h = obj_surface->orig_height;
4047     w = obj_surface->width;
4048     h = obj_surface->height;
4049
4050     /* destination Y surface index 24 */
4051     gen7_pp_set_surface_state(ctx, pp_context,
4052                               obj_surface->bo, 0,
4053                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4054                               24, 1);
4055
4056     /* destination UV surface index 25 */
4057     gen7_pp_set_surface_state(ctx, pp_context,
4058                               obj_surface->bo, w * h,
4059                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4060                               25, 1);
4061
4062     /* sampler dn */
4063     dri_bo_map(pp_context->sampler_state_table.bo, True);
4064     assert(pp_context->sampler_state_table.bo->virtual);
4065     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4066     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4067
4068     /* sample dn index 1 */
4069     index = 0;
4070     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4071     sampler_dn[index].dw0.dnmh_delt = 8;
4072     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4073     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4074     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4075     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4076
4077     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4078     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4079     sampler_dn[index].dw1.stmm_c2 = 0;
4080     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4081     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4082
4083     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4084     sampler_dn[index].dw2.bne_edge_th = 1;
4085     sampler_dn[index].dw2.smooth_mv_th = 0;
4086     sampler_dn[index].dw2.sad_tight_th = 5;
4087     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4088     sampler_dn[index].dw2.good_neighbor_th = 4;
4089
4090     sampler_dn[index].dw3.maximum_stmm = 128;
4091     sampler_dn[index].dw3.multipler_for_vecm = 2;
4092     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4093     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4094     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4095
4096     sampler_dn[index].dw4.sdi_delta = 8;
4097     sampler_dn[index].dw4.sdi_threshold = 128;
4098     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4099     sampler_dn[index].dw4.stmm_shift_up = 0;
4100     sampler_dn[index].dw4.stmm_shift_down = 0;
4101     sampler_dn[index].dw4.minimum_stmm = 0;
4102
4103     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4104     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4105     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4106     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4107
4108     sampler_dn[index].dw6.dn_enable = 1;
4109     sampler_dn[index].dw6.di_enable = 0;
4110     sampler_dn[index].dw6.di_partial = 0;
4111     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4112     sampler_dn[index].dw6.dndi_stream_id = 1;
4113     sampler_dn[index].dw6.dndi_first_frame = 1;
4114     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4115     sampler_dn[index].dw6.mcdi_enable = 0;
4116     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4117     sampler_dn[index].dw6.cat_th1 = 0;
4118     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4119     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4120
4121     sampler_dn[index].dw7.sad_tha = 5;
4122     sampler_dn[index].dw7.sad_thb = 10;
4123     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4124     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4125     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4126     sampler_dn[index].dw7.vdi_walker_enable = 0;
4127     sampler_dn[index].dw7.neighborpixel_th = 10;
4128     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4129
4130     dri_bo_unmap(pp_context->sampler_state_table.bo);
4131
4132     /* private function & data */
4133     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4134     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4135     pp_context->private_context = &pp_context->pp_dn_context;
4136     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4137
4138     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4139     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4140     pp_static_parameter->grf1.di_top_field_first = 0;
4141     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4142
4143     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4144     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4145     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4146
4147     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4148     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4149
4150     pp_dn_context->dest_w = w;
4151     pp_dn_context->dest_h = h;
4152
4153     dst_surface->flags = src_surface->flags;
4154
4155     return VA_STATUS_SUCCESS;
4156 }
4157
4158 static VAStatus
4159 ironlake_pp_initialize(
4160     VADriverContextP ctx,
4161     struct i965_post_processing_context *pp_context,
4162     const struct i965_surface *src_surface,
4163     const VARectangle *src_rect,
4164     struct i965_surface *dst_surface,
4165     const VARectangle *dst_rect,
4166     int pp_index,
4167     void *filter_param
4168 )
4169 {
4170     VAStatus va_status;
4171     struct i965_driver_data *i965 = i965_driver_data(ctx);
4172     struct pp_module *pp_module;
4173     dri_bo *bo;
4174     int static_param_size, inline_param_size;
4175
4176     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4177     bo = dri_bo_alloc(i965->intel.bufmgr,
4178                       "surface state & binding table",
4179                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4180                       4096);
4181     assert(bo);
4182     pp_context->surface_state_binding_table.bo = bo;
4183
4184     dri_bo_unreference(pp_context->curbe.bo);
4185     bo = dri_bo_alloc(i965->intel.bufmgr,
4186                       "constant buffer",
4187                       4096, 
4188                       4096);
4189     assert(bo);
4190     pp_context->curbe.bo = bo;
4191
4192     dri_bo_unreference(pp_context->idrt.bo);
4193     bo = dri_bo_alloc(i965->intel.bufmgr, 
4194                       "interface discriptor", 
4195                       sizeof(struct i965_interface_descriptor), 
4196                       4096);
4197     assert(bo);
4198     pp_context->idrt.bo = bo;
4199     pp_context->idrt.num_interface_descriptors = 0;
4200
4201     dri_bo_unreference(pp_context->sampler_state_table.bo);
4202     bo = dri_bo_alloc(i965->intel.bufmgr, 
4203                       "sampler state table", 
4204                       4096,
4205                       4096);
4206     assert(bo);
4207     dri_bo_map(bo, True);
4208     memset(bo->virtual, 0, bo->size);
4209     dri_bo_unmap(bo);
4210     pp_context->sampler_state_table.bo = bo;
4211
4212     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4213     bo = dri_bo_alloc(i965->intel.bufmgr, 
4214                       "sampler 8x8 state ",
4215                       4096,
4216                       4096);
4217     assert(bo);
4218     pp_context->sampler_state_table.bo_8x8 = bo;
4219
4220     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4221     bo = dri_bo_alloc(i965->intel.bufmgr, 
4222                       "sampler 8x8 state ",
4223                       4096,
4224                       4096);
4225     assert(bo);
4226     pp_context->sampler_state_table.bo_8x8_uv = bo;
4227
4228     dri_bo_unreference(pp_context->vfe_state.bo);
4229     bo = dri_bo_alloc(i965->intel.bufmgr, 
4230                       "vfe state", 
4231                       sizeof(struct i965_vfe_state), 
4232                       4096);
4233     assert(bo);
4234     pp_context->vfe_state.bo = bo;
4235
4236     static_param_size = sizeof(struct pp_static_parameter);
4237     inline_param_size = sizeof(struct pp_inline_parameter);
4238
4239     memset(pp_context->pp_static_parameter, 0, static_param_size);
4240     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4241     
4242     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4243     pp_context->current_pp = pp_index;
4244     pp_module = &pp_context->pp_modules[pp_index];
4245     
4246     if (pp_module->initialize)
4247         va_status = pp_module->initialize(ctx, pp_context,
4248                                           src_surface,
4249                                           src_rect,
4250                                           dst_surface,
4251                                           dst_rect,
4252                                           filter_param);
4253     else
4254         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4255
4256     return va_status;
4257 }
4258
4259 static VAStatus
4260 ironlake_post_processing(
4261     VADriverContextP   ctx,
4262     struct i965_post_processing_context *pp_context,
4263     const struct i965_surface *src_surface,
4264     const VARectangle *src_rect,
4265     struct i965_surface *dst_surface,
4266     const VARectangle *dst_rect,
4267     int                pp_index,
4268     void *filter_param
4269 )
4270 {
4271     VAStatus va_status;
4272
4273     va_status = ironlake_pp_initialize(ctx, pp_context,
4274                                        src_surface,
4275                                        src_rect,
4276                                        dst_surface,
4277                                        dst_rect,
4278                                        pp_index,
4279                                        filter_param);
4280
4281     if (va_status == VA_STATUS_SUCCESS) {
4282         ironlake_pp_states_setup(ctx, pp_context);
4283         ironlake_pp_pipeline_setup(ctx, pp_context);
4284     }
4285
4286     return va_status;
4287 }
4288
4289 static VAStatus
4290 gen6_pp_initialize(
4291     VADriverContextP ctx,
4292     struct i965_post_processing_context *pp_context,
4293     const struct i965_surface *src_surface,
4294     const VARectangle *src_rect,
4295     struct i965_surface *dst_surface,
4296     const VARectangle *dst_rect,
4297     int pp_index,
4298     void *filter_param
4299 )
4300 {
4301     VAStatus va_status;
4302     struct i965_driver_data *i965 = i965_driver_data(ctx);
4303     struct pp_module *pp_module;
4304     dri_bo *bo;
4305     int static_param_size, inline_param_size;
4306
4307     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4308     bo = dri_bo_alloc(i965->intel.bufmgr,
4309                       "surface state & binding table",
4310                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4311                       4096);
4312     assert(bo);
4313     pp_context->surface_state_binding_table.bo = bo;
4314
4315     dri_bo_unreference(pp_context->curbe.bo);
4316     bo = dri_bo_alloc(i965->intel.bufmgr,
4317                       "constant buffer",
4318                       4096, 
4319                       4096);
4320     assert(bo);
4321     pp_context->curbe.bo = bo;
4322
4323     dri_bo_unreference(pp_context->idrt.bo);
4324     bo = dri_bo_alloc(i965->intel.bufmgr, 
4325                       "interface discriptor", 
4326                       sizeof(struct gen6_interface_descriptor_data), 
4327                       4096);
4328     assert(bo);
4329     pp_context->idrt.bo = bo;
4330     pp_context->idrt.num_interface_descriptors = 0;
4331
4332     dri_bo_unreference(pp_context->sampler_state_table.bo);
4333     bo = dri_bo_alloc(i965->intel.bufmgr, 
4334                       "sampler state table", 
4335                       4096,
4336                       4096);
4337     assert(bo);
4338     dri_bo_map(bo, True);
4339     memset(bo->virtual, 0, bo->size);
4340     dri_bo_unmap(bo);
4341     pp_context->sampler_state_table.bo = bo;
4342
4343     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4344     bo = dri_bo_alloc(i965->intel.bufmgr, 
4345                       "sampler 8x8 state ",
4346                       4096,
4347                       4096);
4348     assert(bo);
4349     pp_context->sampler_state_table.bo_8x8 = bo;
4350
4351     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4352     bo = dri_bo_alloc(i965->intel.bufmgr, 
4353                       "sampler 8x8 state ",
4354                       4096,
4355                       4096);
4356     assert(bo);
4357     pp_context->sampler_state_table.bo_8x8_uv = bo;
4358
4359     dri_bo_unreference(pp_context->vfe_state.bo);
4360     bo = dri_bo_alloc(i965->intel.bufmgr, 
4361                       "vfe state", 
4362                       sizeof(struct i965_vfe_state), 
4363                       4096);
4364     assert(bo);
4365     pp_context->vfe_state.bo = bo;
4366     
4367     if (IS_GEN7(i965->intel.device_info)) {
4368         static_param_size = sizeof(struct gen7_pp_static_parameter);
4369         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4370     } else {
4371         static_param_size = sizeof(struct pp_static_parameter);
4372         inline_param_size = sizeof(struct pp_inline_parameter);
4373     }
4374
4375     memset(pp_context->pp_static_parameter, 0, static_param_size);
4376     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4377
4378     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4379     pp_context->current_pp = pp_index;
4380     pp_module = &pp_context->pp_modules[pp_index];
4381     
4382     if (pp_module->initialize)
4383         va_status = pp_module->initialize(ctx, pp_context,
4384                                           src_surface,
4385                                           src_rect,
4386                                           dst_surface,
4387                                           dst_rect,
4388                                           filter_param);
4389     else
4390         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4391
4392     calculate_boundary_block_mask(pp_context, dst_rect);
4393
4394     return va_status;
4395 }
4396
4397
4398 static void
4399 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4400                                    struct i965_post_processing_context *pp_context)
4401 {
4402     struct i965_driver_data *i965 = i965_driver_data(ctx);
4403     struct gen6_interface_descriptor_data *desc;
4404     dri_bo *bo;
4405     int pp_index = pp_context->current_pp;
4406
4407     bo = pp_context->idrt.bo;
4408     dri_bo_map(bo, True);
4409     assert(bo->virtual);
4410     desc = bo->virtual;
4411     memset(desc, 0, sizeof(*desc));
4412     desc->desc0.kernel_start_pointer = 
4413         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4414     desc->desc1.single_program_flow = 1;
4415     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4416     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4417     desc->desc2.sampler_state_pointer = 
4418         pp_context->sampler_state_table.bo->offset >> 5;
4419     desc->desc3.binding_table_entry_count = 0;
4420     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4421     desc->desc4.constant_urb_entry_read_offset = 0;
4422
4423     if (IS_GEN7(i965->intel.device_info))
4424         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
4425     else
4426         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4427
4428     dri_bo_emit_reloc(bo,
4429                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4430                       0,
4431                       offsetof(struct gen6_interface_descriptor_data, desc0),
4432                       pp_context->pp_modules[pp_index].kernel.bo);
4433
4434     dri_bo_emit_reloc(bo,
4435                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4436                       desc->desc2.sampler_count << 2,
4437                       offsetof(struct gen6_interface_descriptor_data, desc2),
4438                       pp_context->sampler_state_table.bo);
4439
4440     dri_bo_unmap(bo);
4441     pp_context->idrt.num_interface_descriptors++;
4442 }
4443
4444 static void
4445 gen6_pp_upload_constants(VADriverContextP ctx,
4446                          struct i965_post_processing_context *pp_context)
4447 {
4448     struct i965_driver_data *i965 = i965_driver_data(ctx);
4449     unsigned char *constant_buffer;
4450     int param_size;
4451
4452     assert(sizeof(struct pp_static_parameter) == 128);
4453     assert(sizeof(struct gen7_pp_static_parameter) == 192);
4454
4455     if (IS_GEN7(i965->intel.device_info))
4456         param_size = sizeof(struct gen7_pp_static_parameter);
4457     else
4458         param_size = sizeof(struct pp_static_parameter);
4459
4460     dri_bo_map(pp_context->curbe.bo, 1);
4461     assert(pp_context->curbe.bo->virtual);
4462     constant_buffer = pp_context->curbe.bo->virtual;
4463     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4464     dri_bo_unmap(pp_context->curbe.bo);
4465 }
4466
4467 static void
4468 gen6_pp_states_setup(VADriverContextP ctx,
4469                      struct i965_post_processing_context *pp_context)
4470 {
4471     gen6_pp_interface_descriptor_table(ctx, pp_context);
4472     gen6_pp_upload_constants(ctx, pp_context);
4473 }
4474
4475 static void
4476 gen6_pp_pipeline_select(VADriverContextP ctx,
4477                         struct i965_post_processing_context *pp_context)
4478 {
4479     struct intel_batchbuffer *batch = pp_context->batch;
4480
4481     BEGIN_BATCH(batch, 1);
4482     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4483     ADVANCE_BATCH(batch);
4484 }
4485
4486 static void
4487 gen6_pp_state_base_address(VADriverContextP ctx,
4488                            struct i965_post_processing_context *pp_context)
4489 {
4490     struct intel_batchbuffer *batch = pp_context->batch;
4491
4492     BEGIN_BATCH(batch, 10);
4493     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4494     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4495     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4496     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4497     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4498     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4499     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4500     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4501     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4502     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4503     ADVANCE_BATCH(batch);
4504 }
4505
4506 static void
4507 gen6_pp_vfe_state(VADriverContextP ctx,
4508                   struct i965_post_processing_context *pp_context)
4509 {
4510     struct intel_batchbuffer *batch = pp_context->batch;
4511
4512     BEGIN_BATCH(batch, 8);
4513     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4514     OUT_BATCH(batch, 0);
4515     OUT_BATCH(batch,
4516               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4517               pp_context->vfe_gpu_state.num_urb_entries << 8);
4518     OUT_BATCH(batch, 0);
4519     OUT_BATCH(batch,
4520               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
4521                 /* URB Entry Allocation Size, in 256 bits unit */
4522               (pp_context->vfe_gpu_state.curbe_allocation_size));
4523                 /* CURBE Allocation Size, in 256 bits unit */
4524     OUT_BATCH(batch, 0);
4525     OUT_BATCH(batch, 0);
4526     OUT_BATCH(batch, 0);
4527     ADVANCE_BATCH(batch);
4528 }
4529
4530 static void
4531 gen6_pp_curbe_load(VADriverContextP ctx,
4532                    struct i965_post_processing_context *pp_context)
4533 {
4534     struct intel_batchbuffer *batch = pp_context->batch;
4535     struct i965_driver_data *i965 = i965_driver_data(ctx);
4536     int param_size;
4537
4538     if (IS_GEN7(i965->intel.device_info))
4539         param_size = sizeof(struct gen7_pp_static_parameter);
4540     else
4541         param_size = sizeof(struct pp_static_parameter);
4542
4543     BEGIN_BATCH(batch, 4);
4544     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4545     OUT_BATCH(batch, 0);
4546     OUT_BATCH(batch,
4547               param_size);
4548     OUT_RELOC(batch, 
4549               pp_context->curbe.bo,
4550               I915_GEM_DOMAIN_INSTRUCTION, 0,
4551               0);
4552     ADVANCE_BATCH(batch);
4553 }
4554
4555 static void
4556 gen6_interface_descriptor_load(VADriverContextP ctx,
4557                                struct i965_post_processing_context *pp_context)
4558 {
4559     struct intel_batchbuffer *batch = pp_context->batch;
4560
4561     BEGIN_BATCH(batch, 4);
4562     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4563     OUT_BATCH(batch, 0);
4564     OUT_BATCH(batch,
4565               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4566     OUT_RELOC(batch, 
4567               pp_context->idrt.bo,
4568               I915_GEM_DOMAIN_INSTRUCTION, 0,
4569               0);
4570     ADVANCE_BATCH(batch);
4571 }
4572
4573 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4574 {
4575     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4576
4577     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4578     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4579     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4580     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4581     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4582     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4583
4584     /* 1 x N */
4585     if (x_steps == 1) {
4586         if (y == y_steps-1) {
4587             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4588         }
4589         else {
4590             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4591         }
4592     }
4593
4594     /* M x 1 */
4595     if (y_steps == 1) {
4596         if (x == 0) { // all blocks in this group are on the left edge
4597             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4598             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4599         }
4600         else if (x == x_steps-1) {
4601             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4602             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4603         }
4604         else {
4605             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4606             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4607             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4608         }
4609     }
4610
4611 }
4612
4613 static void
4614 gen6_pp_object_walker(VADriverContextP ctx,
4615                       struct i965_post_processing_context *pp_context)
4616 {
4617     struct i965_driver_data *i965 = i965_driver_data(ctx);
4618     struct intel_batchbuffer *batch = pp_context->batch;
4619     int x, x_steps, y, y_steps;
4620     int param_size, command_length_in_dws;
4621     dri_bo *command_buffer;
4622     unsigned int *command_ptr;
4623
4624     if (IS_GEN7(i965->intel.device_info))
4625         param_size = sizeof(struct gen7_pp_inline_parameter);
4626     else
4627         param_size = sizeof(struct pp_inline_parameter);
4628
4629     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4630     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4631     command_length_in_dws = 6 + (param_size >> 2);
4632     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4633                                   "command objects buffer",
4634                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4635                                   4096);
4636
4637     dri_bo_map(command_buffer, 1);
4638     command_ptr = command_buffer->virtual;
4639
4640     for (y = 0; y < y_steps; y++) {
4641         for (x = 0; x < x_steps; x++) {
4642             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4643                 // some common block parameter update goes here, apply to all pp functions
4644                 if (IS_GEN6(i965->intel.device_info))
4645                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4646                 
4647                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4648                 *command_ptr++ = 0;
4649                 *command_ptr++ = 0;
4650                 *command_ptr++ = 0;
4651                 *command_ptr++ = 0;
4652                 *command_ptr++ = 0;
4653                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4654                 command_ptr += (param_size >> 2);
4655             }
4656         }
4657     }
4658
4659     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4660         *command_ptr++ = 0;
4661
4662     *command_ptr = MI_BATCH_BUFFER_END;
4663
4664     dri_bo_unmap(command_buffer);
4665
4666     BEGIN_BATCH(batch, 2);
4667     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4668     OUT_RELOC(batch, command_buffer,
4669               I915_GEM_DOMAIN_COMMAND, 0,
4670               0);
4671     ADVANCE_BATCH(batch);
4672
4673     dri_bo_unreference(command_buffer);
4674
4675     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4676      * will cause control to pass back to ring buffer 
4677      */
4678     intel_batchbuffer_end_atomic(batch);
4679     intel_batchbuffer_flush(batch);
4680     intel_batchbuffer_start_atomic(batch, 0x1000);
4681 }
4682
4683 static void
4684 gen6_pp_pipeline_setup(VADriverContextP ctx,
4685                        struct i965_post_processing_context *pp_context)
4686 {
4687     struct intel_batchbuffer *batch = pp_context->batch;
4688
4689     intel_batchbuffer_start_atomic(batch, 0x1000);
4690     intel_batchbuffer_emit_mi_flush(batch);
4691     gen6_pp_pipeline_select(ctx, pp_context);
4692     gen6_pp_state_base_address(ctx, pp_context);
4693     gen6_pp_vfe_state(ctx, pp_context);
4694     gen6_pp_curbe_load(ctx, pp_context);
4695     gen6_interface_descriptor_load(ctx, pp_context);
4696     gen6_pp_object_walker(ctx, pp_context);
4697     intel_batchbuffer_end_atomic(batch);
4698 }
4699
4700 static VAStatus
4701 gen6_post_processing(
4702     VADriverContextP ctx,
4703     struct i965_post_processing_context *pp_context,
4704     const struct i965_surface *src_surface,
4705     const VARectangle *src_rect,
4706     struct i965_surface *dst_surface,
4707     const VARectangle *dst_rect,
4708     int pp_index,
4709     void *filter_param
4710 )
4711 {
4712     VAStatus va_status;
4713     
4714     va_status = gen6_pp_initialize(ctx, pp_context,
4715                                    src_surface,
4716                                    src_rect,
4717                                    dst_surface,
4718                                    dst_rect,
4719                                    pp_index,
4720                                    filter_param);
4721
4722     if (va_status == VA_STATUS_SUCCESS) {
4723         gen6_pp_states_setup(ctx, pp_context);
4724         gen6_pp_pipeline_setup(ctx, pp_context);
4725     }
4726
4727     if (va_status == VA_STATUS_SUCCESS_1)
4728         va_status = VA_STATUS_SUCCESS;
4729
4730     return va_status;
4731 }
4732
4733 static VAStatus
4734 i965_post_processing_internal(
4735     VADriverContextP   ctx,
4736     struct i965_post_processing_context *pp_context,
4737     const struct i965_surface *src_surface,
4738     const VARectangle *src_rect,
4739     struct i965_surface *dst_surface,
4740     const VARectangle *dst_rect,
4741     int                pp_index,
4742     void *filter_param
4743 )
4744 {
4745     VAStatus va_status;
4746     struct i965_driver_data *i965 = i965_driver_data(ctx);
4747
4748     if (pp_context && pp_context->intel_post_processing) {
4749         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4750                           src_surface, src_rect,
4751                           dst_surface, dst_rect,
4752                           pp_index, filter_param);
4753     } else {
4754         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4755     }
4756
4757     return va_status;
4758 }
4759
4760 static void
4761 rgb_to_yuv(unsigned int argb,
4762            unsigned char *y,
4763            unsigned char *u,
4764            unsigned char *v,
4765            unsigned char *a)
4766 {
4767     int r = ((argb >> 16) & 0xff);
4768     int g = ((argb >> 8) & 0xff);
4769     int b = ((argb >> 0) & 0xff);
4770     
4771     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4772     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4773     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4774     *a = ((argb >> 24) & 0xff);
4775 }
4776
4777 static void 
4778 i965_vpp_clear_surface(VADriverContextP ctx,
4779                        struct i965_post_processing_context *pp_context,
4780                        struct object_surface *obj_surface,
4781                        unsigned int color)
4782 {
4783     struct i965_driver_data *i965 = i965_driver_data(ctx);
4784     struct intel_batchbuffer *batch = pp_context->batch;
4785     unsigned int blt_cmd, br13;
4786     unsigned int tiling = 0, swizzle = 0;
4787     int pitch;
4788     unsigned char y, u, v, a = 0;
4789     int region_width, region_height;
4790
4791     /* Currently only support NV12 surface */
4792     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4793         return;
4794
4795     rgb_to_yuv(color, &y, &u, &v, &a);
4796
4797     if (a == 0)
4798         return;
4799
4800     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4801     blt_cmd = XY_COLOR_BLT_CMD;
4802     pitch = obj_surface->width;
4803
4804     if (tiling != I915_TILING_NONE) {
4805         assert(tiling == I915_TILING_Y);
4806         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4807         // pitch >>= 2;
4808     }
4809
4810     br13 = 0xf0 << 16;
4811     br13 |= BR13_8;
4812     br13 |= pitch;
4813
4814     if (IS_IRONLAKE(i965->intel.device_info)) {
4815         intel_batchbuffer_start_atomic(batch, 48);
4816         BEGIN_BATCH(batch, 12);
4817     } else {
4818         /* Will double-check the command if the new chipset is added */
4819         intel_batchbuffer_start_atomic_blt(batch, 48);
4820         BEGIN_BLT_BATCH(batch, 12);
4821     }
4822
4823     region_width = obj_surface->width;
4824     region_height = obj_surface->height;
4825
4826     OUT_BATCH(batch, blt_cmd);
4827     OUT_BATCH(batch, br13);
4828     OUT_BATCH(batch,
4829               0 << 16 |
4830               0);
4831     OUT_BATCH(batch,
4832               region_height << 16 |
4833               region_width);
4834     OUT_RELOC(batch, obj_surface->bo, 
4835               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4836               0);
4837     OUT_BATCH(batch, y);
4838
4839     br13 = 0xf0 << 16;
4840     br13 |= BR13_565;
4841     br13 |= pitch;
4842
4843     region_width = obj_surface->width / 2;
4844     region_height = obj_surface->height / 2;
4845
4846     if (tiling == I915_TILING_Y) {
4847         region_height = ALIGN(obj_surface->height / 2, 32);
4848     }
4849
4850     OUT_BATCH(batch, blt_cmd);
4851     OUT_BATCH(batch, br13);
4852     OUT_BATCH(batch,
4853               0 << 16 |
4854               0);
4855     OUT_BATCH(batch,
4856               region_height << 16 |
4857               region_width);
4858     OUT_RELOC(batch, obj_surface->bo, 
4859               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4860               obj_surface->width * obj_surface->y_cb_offset);
4861     OUT_BATCH(batch, v << 8 | u);
4862
4863     ADVANCE_BATCH(batch);
4864     intel_batchbuffer_end_atomic(batch);
4865 }
4866
4867 VAStatus
4868 i965_scaling_processing(
4869     VADriverContextP   ctx,
4870     struct object_surface *src_surface_obj,
4871     const VARectangle *src_rect,
4872     struct object_surface *dst_surface_obj,
4873     const VARectangle *dst_rect,
4874     unsigned int       va_flags)
4875 {
4876     VAStatus va_status = VA_STATUS_SUCCESS;
4877     struct i965_driver_data *i965 = i965_driver_data(ctx);
4878  
4879     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4880     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4881
4882     if (HAS_VPP(i965)) {
4883         struct i965_surface src_surface;
4884         struct i965_surface dst_surface;
4885         struct i965_post_processing_context *pp_context;
4886         unsigned int filter_flags;
4887
4888          _i965LockMutex(&i965->pp_mutex);
4889
4890          src_surface.base = (struct object_base *)src_surface_obj;
4891          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4892          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4893          dst_surface.base = (struct object_base *)dst_surface_obj;
4894          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4895          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4896
4897          pp_context = i965->pp_context;
4898          filter_flags = pp_context->filter_flags;
4899          pp_context->filter_flags = va_flags;
4900
4901          va_status = i965_post_processing_internal(ctx, pp_context,
4902              &src_surface, src_rect, &dst_surface, dst_rect,
4903              avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
4904
4905          pp_context->filter_flags = filter_flags;
4906
4907          _i965UnlockMutex(&i965->pp_mutex);
4908     }
4909
4910     return va_status;
4911 }
4912
4913 VASurfaceID
4914 i965_post_processing(
4915     VADriverContextP   ctx,
4916     struct object_surface *obj_surface,
4917     const VARectangle *src_rect,
4918     const VARectangle *dst_rect,
4919     unsigned int       va_flags,
4920     int               *has_done_scaling,
4921     VARectangle *calibrated_rect
4922 )
4923 {
4924     struct i965_driver_data *i965 = i965_driver_data(ctx);
4925     VASurfaceID out_surface_id = VA_INVALID_ID;
4926     VASurfaceID tmp_id = VA_INVALID_ID;
4927     
4928     *has_done_scaling = 0;
4929
4930     if (HAS_VPP(i965)) {
4931         VAStatus status;
4932         struct i965_surface src_surface;
4933         struct i965_surface dst_surface;
4934         struct i965_post_processing_context *pp_context;
4935
4936         /* Currently only support post processing for NV12 surface */
4937         if (obj_surface->fourcc != VA_FOURCC_NV12)
4938             return out_surface_id;
4939
4940         _i965LockMutex(&i965->pp_mutex);
4941
4942         pp_context = i965->pp_context;
4943         pp_context->filter_flags = va_flags;
4944         if (avs_is_needed(va_flags)) {
4945             VARectangle tmp_dst_rect;
4946             struct i965_render_state *render_state = &i965->render_state;
4947             struct intel_region *dest_region = render_state->draw_region;
4948
4949             if (out_surface_id != VA_INVALID_ID)
4950                 tmp_id = out_surface_id;
4951
4952             tmp_dst_rect.x = 0;
4953             tmp_dst_rect.y = 0;
4954             tmp_dst_rect.width = dst_rect->width;
4955             tmp_dst_rect.height = dst_rect->height;
4956             src_surface.base = (struct object_base *)obj_surface;
4957             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4958             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4959
4960             status = i965_CreateSurfaces(ctx,
4961                                          dst_rect->width,
4962                                          dst_rect->height,
4963                                          VA_RT_FORMAT_YUV420,
4964                                          1,
4965                                          &out_surface_id);
4966             assert(status == VA_STATUS_SUCCESS);
4967             obj_surface = SURFACE(out_surface_id);
4968             assert(obj_surface);
4969             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4970             i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
4971
4972             dst_surface.base = (struct object_base *)obj_surface;
4973             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4974             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4975
4976             i965_post_processing_internal(ctx, pp_context,
4977                                           &src_surface,
4978                                           src_rect,
4979                                           &dst_surface,
4980                                           &tmp_dst_rect,
4981                                           PP_NV12_AVS,
4982                                           NULL);
4983
4984             if (tmp_id != VA_INVALID_ID)
4985                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4986                 
4987             *has_done_scaling = 1;
4988             calibrated_rect->x = 0;
4989             calibrated_rect->y = 0;
4990             calibrated_rect->width = dst_rect->width;
4991             calibrated_rect->height = dst_rect->height;
4992         }
4993
4994         _i965UnlockMutex(&i965->pp_mutex);
4995     }
4996
4997     return out_surface_id;
4998 }       
4999
5000 static VAStatus
5001 i965_image_pl2_processing(VADriverContextP ctx,
5002                           const struct i965_surface *src_surface,
5003                           const VARectangle *src_rect,
5004                           struct i965_surface *dst_surface,
5005                           const VARectangle *dst_rect);
5006
5007 static VAStatus
5008 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
5009                                    VAStatus (*i965_image_plx_nv12_processing)(
5010                                        VADriverContextP,
5011                                        const struct i965_surface *,
5012                                        const VARectangle *,
5013                                        struct i965_surface *,
5014                                        const VARectangle *),
5015                                    const struct i965_surface *src_surface,
5016                                    const VARectangle *src_rect,
5017                                    struct i965_surface *dst_surface,
5018                                    const VARectangle *dst_rect)
5019 {
5020     struct i965_driver_data *i965 = i965_driver_data(ctx);
5021     VAStatus status;
5022     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5023     struct object_surface *obj_surface = NULL;
5024     struct i965_surface tmp_surface;
5025     int width, height;
5026
5027     pp_get_surface_size(ctx, dst_surface, &width, &height);
5028     status = i965_CreateSurfaces(ctx,
5029                                  width,
5030                                  height,
5031                                  VA_RT_FORMAT_YUV420,
5032                                  1,
5033                                  &tmp_surface_id);
5034     assert(status == VA_STATUS_SUCCESS);
5035     obj_surface = SURFACE(tmp_surface_id);
5036     assert(obj_surface);
5037     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5038
5039     tmp_surface.base = (struct object_base *)obj_surface;
5040     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5041     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5042
5043     status = i965_image_plx_nv12_processing(ctx,
5044                                             src_surface,
5045                                             src_rect,
5046                                             &tmp_surface,
5047                                             dst_rect);
5048
5049     if (status == VA_STATUS_SUCCESS)
5050         status = i965_image_pl2_processing(ctx,
5051                                            &tmp_surface,
5052                                            dst_rect,
5053                                            dst_surface,
5054                                            dst_rect);
5055
5056     i965_DestroySurfaces(ctx,
5057                          &tmp_surface_id,
5058                          1);
5059
5060     return status;
5061 }
5062
5063
5064 static VAStatus
5065 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5066                                const struct i965_surface *src_surface,
5067                                const VARectangle *src_rect,
5068                                struct i965_surface *dst_surface,
5069                                const VARectangle *dst_rect)
5070 {
5071     struct i965_driver_data *i965 = i965_driver_data(ctx);
5072     struct i965_post_processing_context *pp_context = i965->pp_context;
5073     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5074     VAStatus vaStatus;
5075
5076     switch (fourcc) {
5077     case VA_FOURCC_NV12:
5078         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5079                                                  src_surface,
5080                                                  src_rect,
5081                                                  dst_surface,
5082                                                  dst_rect,
5083                                                  PP_RGBX_LOAD_SAVE_NV12,
5084                                                  NULL);
5085         intel_batchbuffer_flush(pp_context->batch);
5086         break;
5087
5088     default:
5089         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5090                                                       i965_image_pl1_rgbx_processing,
5091                                                       src_surface,
5092                                                       src_rect,
5093                                                       dst_surface,
5094                                                       dst_rect);
5095         break;
5096     }
5097
5098     return vaStatus;
5099 }
5100
5101 static VAStatus
5102 i965_image_pl3_processing(VADriverContextP ctx,
5103                           const struct i965_surface *src_surface,
5104                           const VARectangle *src_rect,
5105                           struct i965_surface *dst_surface,
5106                           const VARectangle *dst_rect)
5107 {
5108     struct i965_driver_data *i965 = i965_driver_data(ctx);
5109     struct i965_post_processing_context *pp_context = i965->pp_context;
5110     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5111     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5112
5113     switch (fourcc) {
5114     case VA_FOURCC_NV12:
5115         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5116                                                  src_surface,
5117                                                  src_rect,
5118                                                  dst_surface,
5119                                                  dst_rect,
5120                                                  PP_PL3_LOAD_SAVE_N12,
5121                                                  NULL);
5122         intel_batchbuffer_flush(pp_context->batch);
5123         break;
5124
5125     case VA_FOURCC_IMC1:
5126     case VA_FOURCC_IMC3:
5127     case VA_FOURCC_YV12:
5128     case VA_FOURCC_I420:
5129         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5130                                                  src_surface,
5131                                                  src_rect,
5132                                                  dst_surface,
5133                                                  dst_rect,
5134                                                  PP_PL3_LOAD_SAVE_PL3,
5135                                                  NULL);
5136         intel_batchbuffer_flush(pp_context->batch);
5137         break;
5138
5139     case VA_FOURCC_YUY2:
5140     case VA_FOURCC_UYVY:
5141         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5142                                                  src_surface,
5143                                                  src_rect,
5144                                                  dst_surface,
5145                                                  dst_rect,
5146                                                  PP_PL3_LOAD_SAVE_PA,
5147                                                  NULL);
5148         intel_batchbuffer_flush(pp_context->batch);
5149         break;
5150
5151     default:
5152         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5153                                                       i965_image_pl3_processing,
5154                                                       src_surface,
5155                                                       src_rect,
5156                                                       dst_surface,
5157                                                       dst_rect);
5158         break;
5159     }
5160
5161     return vaStatus;
5162 }
5163
5164 static VAStatus
5165 i965_image_pl2_processing(VADriverContextP ctx,
5166                           const struct i965_surface *src_surface,
5167                           const VARectangle *src_rect,
5168                           struct i965_surface *dst_surface,
5169                           const VARectangle *dst_rect)
5170 {
5171     struct i965_driver_data *i965 = i965_driver_data(ctx);
5172     struct i965_post_processing_context *pp_context = i965->pp_context;
5173     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5174     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5175
5176     switch (fourcc) {
5177     case VA_FOURCC_NV12:
5178         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5179                                                  src_surface,
5180                                                  src_rect,
5181                                                  dst_surface,
5182                                                  dst_rect,
5183                                                  PP_NV12_LOAD_SAVE_N12,
5184                                                  NULL);
5185         break;
5186
5187     case VA_FOURCC_IMC1:
5188     case VA_FOURCC_IMC3:
5189     case VA_FOURCC_YV12:
5190     case VA_FOURCC_I420:
5191         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5192                                                  src_surface,
5193                                                  src_rect,
5194                                                  dst_surface,
5195                                                  dst_rect,
5196                                                  PP_NV12_LOAD_SAVE_PL3,
5197                                                  NULL);
5198         break;
5199
5200     case VA_FOURCC_YUY2:
5201     case VA_FOURCC_UYVY:
5202         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5203                                                  src_surface,
5204                                                  src_rect,
5205                                                  dst_surface,
5206                                                  dst_rect,
5207                                                  PP_NV12_LOAD_SAVE_PA,
5208                                                  NULL);
5209         break;
5210
5211     case VA_FOURCC_BGRX:
5212     case VA_FOURCC_BGRA:
5213     case VA_FOURCC_RGBX:
5214     case VA_FOURCC_RGBA:
5215         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5216                                                  src_surface,
5217                                                  src_rect,
5218                                                  dst_surface,
5219                                                  dst_rect,
5220                                                  PP_NV12_LOAD_SAVE_RGBX,
5221                                                  NULL);
5222         break;
5223
5224     default:
5225         return VA_STATUS_ERROR_UNIMPLEMENTED;
5226     }
5227
5228     intel_batchbuffer_flush(pp_context->batch);
5229
5230     return vaStatus;
5231 }
5232
5233 static VAStatus
5234 i965_image_pl1_processing(VADriverContextP ctx,
5235                           const struct i965_surface *src_surface,
5236                           const VARectangle *src_rect,
5237                           struct i965_surface *dst_surface,
5238                           const VARectangle *dst_rect)
5239 {
5240     struct i965_driver_data *i965 = i965_driver_data(ctx);
5241     struct i965_post_processing_context *pp_context = i965->pp_context;
5242     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5243     VAStatus vaStatus;
5244
5245     switch (fourcc) {
5246     case VA_FOURCC_NV12:
5247         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5248                                                  src_surface,
5249                                                  src_rect,
5250                                                  dst_surface,
5251                                                  dst_rect,
5252                                                  PP_PA_LOAD_SAVE_NV12,
5253                                                  NULL);
5254         intel_batchbuffer_flush(pp_context->batch);
5255         break;
5256
5257     case VA_FOURCC_YV12:
5258         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5259                                                  src_surface,
5260                                                  src_rect,
5261                                                  dst_surface,
5262                                                  dst_rect,
5263                                                  PP_PA_LOAD_SAVE_PL3,
5264                                                  NULL);
5265         intel_batchbuffer_flush(pp_context->batch);
5266         break;
5267
5268     case VA_FOURCC_YUY2:
5269     case VA_FOURCC_UYVY:
5270         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5271                                                  src_surface,
5272                                                  src_rect,
5273                                                  dst_surface,
5274                                                  dst_rect,
5275                                                  PP_PA_LOAD_SAVE_PA,
5276                                                  NULL);
5277         intel_batchbuffer_flush(pp_context->batch);
5278         break;
5279
5280     default:
5281         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5282                                                       i965_image_pl1_processing,
5283                                                       src_surface,
5284                                                       src_rect,
5285                                                       dst_surface,
5286                                                       dst_rect);
5287         break;
5288     }
5289
5290     return vaStatus;
5291 }
5292
5293 VAStatus
5294 i965_image_processing(VADriverContextP ctx,
5295                       const struct i965_surface *src_surface,
5296                       const VARectangle *src_rect,
5297                       struct i965_surface *dst_surface,
5298                       const VARectangle *dst_rect)
5299 {
5300     struct i965_driver_data *i965 = i965_driver_data(ctx);
5301     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5302
5303     if (HAS_VPP(i965)) {
5304         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5305
5306         _i965LockMutex(&i965->pp_mutex);
5307
5308         switch (fourcc) {
5309         case VA_FOURCC_YV12:
5310         case VA_FOURCC_I420:
5311         case VA_FOURCC_IMC1:
5312         case VA_FOURCC_IMC3:
5313         case VA_FOURCC_422H:
5314         case VA_FOURCC_422V:
5315         case VA_FOURCC_411P:
5316         case VA_FOURCC_444P:
5317         case VA_FOURCC_YV16:
5318             status = i965_image_pl3_processing(ctx,
5319                                                src_surface,
5320                                                src_rect,
5321                                                dst_surface,
5322                                                dst_rect);
5323             break;
5324
5325         case  VA_FOURCC_NV12:
5326             status = i965_image_pl2_processing(ctx,
5327                                                src_surface,
5328                                                src_rect,
5329                                                dst_surface,
5330                                                dst_rect);
5331             break;
5332         case VA_FOURCC_YUY2:
5333         case VA_FOURCC_UYVY:
5334             status = i965_image_pl1_processing(ctx,
5335                                                src_surface,
5336                                                src_rect,
5337                                                dst_surface,
5338                                                dst_rect);
5339             break;
5340         case VA_FOURCC_BGRA:
5341         case VA_FOURCC_BGRX:
5342         case VA_FOURCC_RGBA:
5343         case VA_FOURCC_RGBX:
5344             status = i965_image_pl1_rgbx_processing(ctx,
5345                                                src_surface,
5346                                                src_rect,
5347                                                dst_surface,
5348                                                dst_rect);
5349             break;
5350         default:
5351             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5352             break;
5353         }
5354         
5355         _i965UnlockMutex(&i965->pp_mutex);
5356     }
5357
5358     return status;
5359 }       
5360
5361 static void
5362 i965_post_processing_context_finalize(VADriverContextP ctx,
5363     struct i965_post_processing_context *pp_context)
5364 {
5365     int i;
5366
5367     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5368     pp_context->surface_state_binding_table.bo = NULL;
5369
5370     dri_bo_unreference(pp_context->curbe.bo);
5371     pp_context->curbe.bo = NULL;
5372
5373     dri_bo_unreference(pp_context->sampler_state_table.bo);
5374     pp_context->sampler_state_table.bo = NULL;
5375
5376     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5377     pp_context->sampler_state_table.bo_8x8 = NULL;
5378
5379     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5380     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5381
5382     dri_bo_unreference(pp_context->idrt.bo);
5383     pp_context->idrt.bo = NULL;
5384     pp_context->idrt.num_interface_descriptors = 0;
5385
5386     dri_bo_unreference(pp_context->vfe_state.bo);
5387     pp_context->vfe_state.bo = NULL;
5388
5389     for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
5390         pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
5391             ctx);
5392
5393     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5394     pp_context->pp_dn_context.stmm_bo = NULL;
5395
5396     for (i = 0; i < NUM_PP_MODULES; i++) {
5397         struct pp_module *pp_module = &pp_context->pp_modules[i];
5398
5399         dri_bo_unreference(pp_module->kernel.bo);
5400         pp_module->kernel.bo = NULL;
5401     }
5402
5403     free(pp_context->pp_static_parameter);
5404     free(pp_context->pp_inline_parameter);
5405     pp_context->pp_static_parameter = NULL;
5406     pp_context->pp_inline_parameter = NULL;
5407 }
5408
5409 void
5410 i965_post_processing_terminate(VADriverContextP ctx)
5411 {
5412     struct i965_driver_data *i965 = i965_driver_data(ctx);
5413     struct i965_post_processing_context *pp_context = i965->pp_context;
5414
5415     if (pp_context) {
5416         pp_context->finalize(ctx, pp_context);
5417         free(pp_context);
5418     }
5419
5420     i965->pp_context = NULL;
5421 }
5422
5423 #define VPP_CURBE_ALLOCATION_SIZE       32
5424
5425 void
5426 i965_post_processing_context_init(VADriverContextP ctx,
5427                                   void *data,
5428                                   struct intel_batchbuffer *batch)
5429 {
5430     struct i965_driver_data *i965 = i965_driver_data(ctx);
5431     int i;
5432     struct i965_post_processing_context *pp_context = data;
5433     const AVSConfig *avs_config;
5434
5435     if (IS_IRONLAKE(i965->intel.device_info)) {
5436         pp_context->urb.size = i965->intel.device_info->urb_size;
5437         pp_context->urb.num_vfe_entries = 32;
5438         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5439         pp_context->urb.num_cs_entries = 1;
5440         pp_context->urb.size_cs_entry = 2;
5441         pp_context->urb.vfe_start = 0;
5442         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5443             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5444         assert(pp_context->urb.cs_start +
5445            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5446         pp_context->intel_post_processing = ironlake_post_processing;
5447     } else {
5448         pp_context->vfe_gpu_state.max_num_threads = 60;
5449         pp_context->vfe_gpu_state.num_urb_entries = 59;
5450         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5451         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5452         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5453         pp_context->intel_post_processing = gen6_post_processing;
5454     }
5455
5456     pp_context->finalize = i965_post_processing_context_finalize;
5457
5458     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5459     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5460     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5461     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5462
5463     if (IS_HASWELL(i965->intel.device_info))
5464         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5465     else if (IS_GEN7(i965->intel.device_info))
5466         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5467     else if (IS_GEN6(i965->intel.device_info))
5468         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5469     else if (IS_IRONLAKE(i965->intel.device_info))
5470         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5471
5472     for (i = 0; i < NUM_PP_MODULES; i++) {
5473         struct pp_module *pp_module = &pp_context->pp_modules[i];
5474         dri_bo_unreference(pp_module->kernel.bo);
5475         if (pp_module->kernel.bin && pp_module->kernel.size) {
5476             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5477                                                 pp_module->kernel.name,
5478                                                 pp_module->kernel.size,
5479                                                 4096);
5480             assert(pp_module->kernel.bo);
5481             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5482         } else {
5483             pp_module->kernel.bo = NULL;
5484         }
5485     }
5486
5487     /* static & inline parameters */
5488     if (IS_GEN7(i965->intel.device_info)) {
5489         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5490         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5491     } else {
5492         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5493         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5494     }
5495
5496     pp_context->batch = batch;
5497     pp_dndi_context_init(&pp_context->pp_dndi_context);
5498
5499     avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
5500         &gen6_avs_config;
5501     avs_init_state(&pp_context->pp_avs_context.state, avs_config);
5502 }
5503
5504 bool
5505 i965_post_processing_init(VADriverContextP ctx)
5506 {
5507     struct i965_driver_data *i965 = i965_driver_data(ctx);
5508     struct i965_post_processing_context *pp_context = i965->pp_context;
5509
5510     if (HAS_VPP(i965)) {
5511         if (pp_context == NULL) {
5512             pp_context = calloc(1, sizeof(*pp_context));
5513             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5514             i965->pp_context = pp_context;
5515         }
5516     }
5517
5518     return true;
5519 }
5520
5521 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5522     PP_NULL,    /* VAProcFilterNone */
5523     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5524     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5525     PP_NULL,    /* VAProcFilterSharpening */
5526     PP_NULL,    /* VAProcFilterColorBalance */
5527 };
5528
5529 static const int proc_frame_to_pp_frame[3] = {
5530     I965_SURFACE_FLAG_FRAME,
5531     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5532     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5533 };
5534
5535 enum {
5536     PP_OP_CHANGE_FORMAT = 1 << 0,
5537     PP_OP_CHANGE_SIZE   = 1 << 1,
5538     PP_OP_DEINTERLACE   = 1 << 2,
5539     PP_OP_COMPLEX       = 1 << 3,
5540 };
5541
5542 static int
5543 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
5544     uint32_t filter_flags)
5545 {
5546     int pp_index = -1;
5547
5548     if (!dst_fourcc)
5549         dst_fourcc = src_fourcc;
5550
5551     switch (src_fourcc) {
5552     case VA_FOURCC_RGBX:
5553     case VA_FOURCC_RGBA:
5554     case VA_FOURCC_BGRX:
5555     case VA_FOURCC_BGRA:
5556         switch (dst_fourcc) {
5557         case VA_FOURCC_NV12:
5558             pp_index = PP_RGBX_LOAD_SAVE_NV12;
5559             break;
5560         }
5561         break;
5562     case VA_FOURCC_YUY2:
5563     case VA_FOURCC_UYVY:
5564         switch (dst_fourcc) {
5565         case VA_FOURCC_NV12:
5566             pp_index = PP_PA_LOAD_SAVE_NV12;
5567             break;
5568         case VA_FOURCC_I420:
5569         case VA_FOURCC_YV12:
5570             pp_index = PP_PA_LOAD_SAVE_PL3;
5571             break;
5572         case VA_FOURCC_YUY2:
5573         case VA_FOURCC_UYVY:
5574             pp_index = PP_PA_LOAD_SAVE_PA;
5575             break;
5576         }
5577         break;
5578     case VA_FOURCC_NV12:
5579         switch (dst_fourcc) {
5580         case VA_FOURCC_NV12:
5581             if (pp_ops & PP_OP_CHANGE_SIZE)
5582                 pp_index = avs_is_needed(filter_flags) ?
5583                     PP_NV12_AVS : PP_NV12_SCALING;
5584             else
5585                 pp_index = PP_NV12_LOAD_SAVE_N12;
5586             break;
5587         case VA_FOURCC_I420:
5588         case VA_FOURCC_YV12:
5589         case VA_FOURCC_IMC1:
5590         case VA_FOURCC_IMC3:
5591             pp_index = PP_NV12_LOAD_SAVE_PL3;
5592             break;
5593         case VA_FOURCC_YUY2:
5594         case VA_FOURCC_UYVY:
5595             pp_index = PP_NV12_LOAD_SAVE_PA;
5596             break;
5597         case VA_FOURCC_RGBX:
5598         case VA_FOURCC_RGBA:
5599         case VA_FOURCC_BGRX:
5600         case VA_FOURCC_BGRA:
5601             pp_index = PP_NV12_LOAD_SAVE_RGBX;
5602             break;
5603         }
5604         break;
5605     case VA_FOURCC_I420:
5606     case VA_FOURCC_YV12:
5607     case VA_FOURCC_IMC1:
5608     case VA_FOURCC_IMC3:
5609     case VA_FOURCC_YV16:
5610     case VA_FOURCC_411P:
5611     case VA_FOURCC_422H:
5612     case VA_FOURCC_422V:
5613     case VA_FOURCC_444P:
5614         switch (dst_fourcc) {
5615         case VA_FOURCC_NV12:
5616             pp_index = PP_PL3_LOAD_SAVE_N12;
5617             break;
5618         case VA_FOURCC_I420:
5619         case VA_FOURCC_YV12:
5620         case VA_FOURCC_IMC1:
5621         case VA_FOURCC_IMC3:
5622             pp_index = PP_PL3_LOAD_SAVE_PL3;
5623             break;
5624         case VA_FOURCC_YUY2:
5625         case VA_FOURCC_UYVY:
5626             pp_index = PP_PL3_LOAD_SAVE_PA;
5627             break;
5628         }
5629         break;
5630     }
5631     return pp_index;
5632 }
5633
5634 static VAStatus
5635 i965_proc_picture_fast(VADriverContextP ctx,
5636     struct i965_proc_context *proc_context, struct proc_state *proc_state)
5637 {
5638     struct i965_driver_data * const i965 = i965_driver_data(ctx);
5639     const VAProcPipelineParameterBuffer * const pipeline_param =
5640         (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5641     struct object_surface *src_obj_surface, *dst_obj_surface;
5642     struct i965_surface src_surface, dst_surface;
5643     const VAProcFilterParameterBufferDeinterlacing *deint_params = NULL;
5644     VARectangle src_rect, dst_rect;
5645     VAStatus status;
5646     uint32_t i, filter_flags = 0, pp_ops = 0;
5647     int pp_index;
5648
5649     /* Validate pipeline parameters */
5650     if (pipeline_param->num_filters > 0 && !pipeline_param->filters)
5651         return VA_STATUS_ERROR_INVALID_PARAMETER;
5652
5653     for (i = 0; i < pipeline_param->num_filters; i++) {
5654         const VAProcFilterParameterBuffer *filter;
5655         struct object_buffer * const obj_buffer =
5656             BUFFER(pipeline_param->filters[i]);
5657
5658         assert(obj_buffer && obj_buffer->buffer_store);
5659         if (!obj_buffer || !obj_buffer->buffer_store)
5660             return VA_STATUS_ERROR_INVALID_PARAMETER;
5661
5662         filter = (VAProcFilterParameterBuffer *)
5663             obj_buffer->buffer_store->buffer;
5664         switch (filter->type) {
5665         case VAProcFilterDeinterlacing:
5666             pp_ops |= PP_OP_DEINTERLACE;
5667             deint_params = (VAProcFilterParameterBufferDeinterlacing *)filter;
5668             break;
5669         default:
5670             pp_ops |= PP_OP_COMPLEX;
5671             break;
5672         }
5673     }
5674     filter_flags |= pipeline_param->filter_flags & VA_FILTER_SCALING_MASK;
5675
5676     /* Validate source surface */
5677     src_obj_surface = SURFACE(pipeline_param->surface);
5678     if (!src_obj_surface)
5679         return VA_STATUS_ERROR_INVALID_SURFACE;
5680
5681     if (!src_obj_surface->fourcc)
5682         return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
5683
5684     if (pipeline_param->surface_region) {
5685         src_rect.x = pipeline_param->surface_region->x;
5686         src_rect.y = pipeline_param->surface_region->y;
5687         src_rect.width = pipeline_param->surface_region->width;
5688         src_rect.height = pipeline_param->surface_region->height;
5689     } else {
5690         src_rect.x = 0;
5691         src_rect.y = 0;
5692         src_rect.width = src_obj_surface->orig_width;
5693         src_rect.height = src_obj_surface->orig_height;
5694     }
5695
5696     src_surface.base  = &src_obj_surface->base;
5697     src_surface.type  = I965_SURFACE_TYPE_SURFACE;
5698     src_surface.flags = I965_SURFACE_FLAG_FRAME;
5699
5700     if (pp_ops & PP_OP_DEINTERLACE) {
5701         filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
5702             VA_TOP_FIELD : VA_BOTTOM_FIELD;
5703         if (deint_params->algorithm != VAProcDeinterlacingBob)
5704             pp_ops |= PP_OP_COMPLEX;
5705     }
5706     else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
5707         filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
5708             VA_TOP_FIELD : VA_BOTTOM_FIELD;
5709         pp_ops |= PP_OP_DEINTERLACE;
5710     }
5711     if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
5712         pp_ops |= PP_OP_COMPLEX;
5713
5714     /* Validate target surface */
5715     dst_obj_surface = SURFACE(proc_state->current_render_target);
5716     if (!dst_obj_surface)
5717         return VA_STATUS_ERROR_INVALID_SURFACE;
5718
5719     if (dst_obj_surface->fourcc &&
5720         dst_obj_surface->fourcc != src_obj_surface->fourcc)
5721         pp_ops |= PP_OP_CHANGE_FORMAT;
5722
5723     if (pipeline_param->output_region) {
5724         dst_rect.x = pipeline_param->output_region->x;
5725         dst_rect.y = pipeline_param->output_region->y;
5726         dst_rect.width = pipeline_param->output_region->width;
5727         dst_rect.height = pipeline_param->output_region->height;
5728     } else {
5729         dst_rect.x = 0;
5730         dst_rect.y = 0;
5731         dst_rect.width = dst_obj_surface->orig_width;
5732         dst_rect.height = dst_obj_surface->orig_height;
5733     }
5734
5735     if (dst_rect.width != src_rect.width || dst_rect.height != src_rect.height)
5736         pp_ops |= PP_OP_CHANGE_SIZE;
5737
5738     dst_surface.base  = &dst_obj_surface->base;
5739     dst_surface.type  = I965_SURFACE_TYPE_SURFACE;
5740     dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5741
5742     /* Validate "fast-path" processing capabilities */
5743     if (!IS_GEN7(i965->intel.device_info)) {
5744         if ((pp_ops & PP_OP_CHANGE_FORMAT) && (pp_ops & PP_OP_CHANGE_SIZE))
5745             return VA_STATUS_ERROR_UNIMPLEMENTED; // temporary surface is needed
5746     }
5747     if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
5748         filter_flags &= ~VA_FILTER_SCALING_MASK;
5749         filter_flags |= VA_FILTER_SCALING_FAST;
5750     }
5751     else {
5752         if (pp_ops & PP_OP_COMPLEX)
5753             return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
5754         if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
5755             return VA_STATUS_ERROR_UNIMPLEMENTED;
5756     }
5757
5758     pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
5759         dst_obj_surface->fourcc, pp_ops, filter_flags);
5760     if (pp_index < 0)
5761         return VA_STATUS_ERROR_UNIMPLEMENTED;
5762
5763     proc_context->pp_context.filter_flags = filter_flags;
5764     status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5765         &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
5766     intel_batchbuffer_flush(proc_context->pp_context.batch);
5767     return status;
5768 }
5769
5770 VAStatus 
5771 i965_proc_picture(VADriverContextP ctx, 
5772                   VAProfile profile, 
5773                   union codec_state *codec_state,
5774                   struct hw_context *hw_context)
5775 {
5776     struct i965_driver_data *i965 = i965_driver_data(ctx);
5777     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5778     struct proc_state *proc_state = &codec_state->proc;
5779     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5780     struct object_surface *obj_surface;
5781     struct i965_surface src_surface, dst_surface;
5782     VARectangle src_rect, dst_rect;
5783     VAStatus status;
5784     int i;
5785     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5786     int num_tmp_surfaces = 0;
5787     unsigned int tiling = 0, swizzle = 0;
5788     int in_width, in_height;
5789
5790     status = i965_proc_picture_fast(ctx, proc_context, proc_state);
5791     if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
5792         return status;
5793
5794     if (pipeline_param->surface == VA_INVALID_ID ||
5795         proc_state->current_render_target == VA_INVALID_ID) {
5796         status = VA_STATUS_ERROR_INVALID_SURFACE;
5797         goto error;
5798     }
5799
5800     obj_surface = SURFACE(pipeline_param->surface);
5801
5802     if (!obj_surface) {
5803         status = VA_STATUS_ERROR_INVALID_SURFACE;
5804         goto error;
5805     }
5806
5807     if (!obj_surface->bo) {
5808         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
5809         goto error;
5810     }
5811
5812     if (pipeline_param->num_filters && !pipeline_param->filters) {
5813         status = VA_STATUS_ERROR_INVALID_PARAMETER;
5814         goto error;
5815     }
5816
5817     in_width = obj_surface->orig_width;
5818     in_height = obj_surface->orig_height;
5819     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5820
5821     src_surface.base = (struct object_base *)obj_surface;
5822     src_surface.type = I965_SURFACE_TYPE_SURFACE;
5823     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5824
5825     VASurfaceID out_surface_id = VA_INVALID_ID;
5826     if (obj_surface->fourcc != VA_FOURCC_NV12) {
5827         src_surface.base = (struct object_base *)obj_surface;
5828         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5829         src_surface.flags = I965_SURFACE_FLAG_FRAME;
5830         src_rect.x = 0;
5831         src_rect.y = 0;
5832         src_rect.width = in_width;
5833         src_rect.height = in_height;
5834
5835         status = i965_CreateSurfaces(ctx,
5836                                      in_width,
5837                                      in_height,
5838                                      VA_RT_FORMAT_YUV420,
5839                                      1,
5840                                      &out_surface_id);
5841         assert(status == VA_STATUS_SUCCESS);
5842         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5843         obj_surface = SURFACE(out_surface_id);
5844         assert(obj_surface);
5845         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5846
5847         dst_surface.base = (struct object_base *)obj_surface;
5848         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5849         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5850         dst_rect.x = 0;
5851         dst_rect.y = 0;
5852         dst_rect.width = in_width;
5853         dst_rect.height = in_height;
5854
5855         status = i965_image_processing(ctx,
5856                                        &src_surface,
5857                                        &src_rect,
5858                                        &dst_surface,
5859                                        &dst_rect);
5860         assert(status == VA_STATUS_SUCCESS);
5861
5862         src_surface.base = (struct object_base *)obj_surface;
5863         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5864         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5865     }
5866
5867     if (pipeline_param->surface_region) {
5868         src_rect.x = pipeline_param->surface_region->x;
5869         src_rect.y = pipeline_param->surface_region->y;
5870         src_rect.width = pipeline_param->surface_region->width;
5871         src_rect.height = pipeline_param->surface_region->height;
5872     } else {
5873         src_rect.x = 0;
5874         src_rect.y = 0;
5875         src_rect.width = in_width;
5876         src_rect.height = in_height;
5877     }
5878
5879     proc_context->pp_context.pipeline_param = pipeline_param;
5880
5881     for (i = 0; i < pipeline_param->num_filters; i++) {
5882         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5883         VAProcFilterParameterBufferBase *filter_param = NULL;
5884         VAProcFilterType filter_type;
5885         int kernel_index;
5886
5887         if (!obj_buffer ||
5888             !obj_buffer->buffer_store ||
5889             !obj_buffer->buffer_store->buffer) {
5890             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
5891             goto error;
5892         }
5893
5894         out_surface_id = VA_INVALID_ID;
5895         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5896         filter_type = filter_param->type;
5897         kernel_index = procfilter_to_pp_flag[filter_type];
5898
5899         if (kernel_index != PP_NULL &&
5900             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5901             status = i965_CreateSurfaces(ctx,
5902                                          in_width,
5903                                          in_height,
5904                                          VA_RT_FORMAT_YUV420,
5905                                          1,
5906                                          &out_surface_id);
5907             assert(status == VA_STATUS_SUCCESS);
5908             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5909             obj_surface = SURFACE(out_surface_id);
5910             assert(obj_surface);
5911             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5912             dst_surface.base = (struct object_base *)obj_surface;
5913             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5914             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5915                                                    &src_surface,
5916                                                    &src_rect,
5917                                                    &dst_surface,
5918                                                    &src_rect,
5919                                                    kernel_index,
5920                                                    filter_param);
5921
5922             if (status == VA_STATUS_SUCCESS) {
5923                 src_surface.base = dst_surface.base;
5924                 src_surface.type = dst_surface.type;
5925                 src_surface.flags = dst_surface.flags;
5926             }
5927         }
5928     }
5929
5930     proc_context->pp_context.pipeline_param = NULL;
5931     obj_surface = SURFACE(proc_state->current_render_target);
5932     
5933     if (!obj_surface) {
5934         status = VA_STATUS_ERROR_INVALID_SURFACE;
5935         goto error;
5936     }
5937
5938     if (pipeline_param->output_region) {
5939         dst_rect.x = pipeline_param->output_region->x;
5940         dst_rect.y = pipeline_param->output_region->y;
5941         dst_rect.width = pipeline_param->output_region->width;
5942         dst_rect.height = pipeline_param->output_region->height;
5943     } else {
5944         dst_rect.x = 0;
5945         dst_rect.y = 0;
5946         dst_rect.width = obj_surface->orig_width;
5947         dst_rect.height = obj_surface->orig_height;
5948     }
5949
5950     if (IS_GEN7(i965->intel.device_info) ||
5951         IS_GEN8(i965->intel.device_info) ||
5952         IS_GEN9(i965->intel.device_info)) {
5953         unsigned int saved_filter_flag;
5954         struct i965_post_processing_context *i965pp_context = i965->pp_context;
5955
5956         if (obj_surface->fourcc == 0) {
5957             i965_check_alloc_surface_bo(ctx, obj_surface, 1,
5958                                         VA_FOURCC_NV12,
5959                                         SUBSAMPLE_YUV420);
5960         }
5961
5962         i965_vpp_clear_surface(ctx, &proc_context->pp_context,
5963                                obj_surface,
5964                                pipeline_param->output_background_color);
5965
5966         intel_batchbuffer_flush(hw_context->batch);
5967
5968         saved_filter_flag = i965pp_context->filter_flags;
5969         i965pp_context->filter_flags = VA_FILTER_SCALING_HQ;
5970
5971         dst_surface.base = (struct object_base *)obj_surface;
5972         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5973         i965_image_processing(ctx, &src_surface, &src_rect, &dst_surface, &dst_rect);
5974
5975         i965pp_context->filter_flags = saved_filter_flag;
5976
5977         if (num_tmp_surfaces)
5978             i965_DestroySurfaces(ctx,
5979                              tmp_surfaces,
5980                              num_tmp_surfaces);
5981
5982         return VA_STATUS_SUCCESS;
5983     }
5984
5985     int csc_needed = 0;
5986     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12){
5987         csc_needed = 1;
5988         out_surface_id = VA_INVALID_ID;
5989         status = i965_CreateSurfaces(ctx,
5990                                      obj_surface->orig_width,
5991                                      obj_surface->orig_height,
5992                                      VA_RT_FORMAT_YUV420, 
5993                                      1,
5994                                      &out_surface_id);
5995         assert(status == VA_STATUS_SUCCESS);
5996         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5997         struct object_surface *csc_surface = SURFACE(out_surface_id);
5998         assert(csc_surface);
5999         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6000         dst_surface.base = (struct object_base *)csc_surface;
6001     } else {
6002         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6003         dst_surface.base = (struct object_base *)obj_surface;
6004     }
6005
6006     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6007     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
6008
6009     // load/save doesn't support different origin offset for src and dst surface
6010     if (src_rect.width == dst_rect.width &&
6011         src_rect.height == dst_rect.height &&
6012         src_rect.x == dst_rect.x &&
6013         src_rect.y == dst_rect.y) {
6014         i965_post_processing_internal(ctx, &proc_context->pp_context,
6015                                       &src_surface,
6016                                       &src_rect,
6017                                       &dst_surface,
6018                                       &dst_rect,
6019                                       PP_NV12_LOAD_SAVE_N12,
6020                                       NULL);
6021     } else {
6022
6023         proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
6024         i965_post_processing_internal(ctx, &proc_context->pp_context,
6025                                       &src_surface,
6026                                       &src_rect,
6027                                       &dst_surface,
6028                                       &dst_rect,
6029                                       avs_is_needed(pipeline_param->filter_flags) ? PP_NV12_AVS : PP_NV12_SCALING,
6030                                       NULL);
6031     }
6032
6033     if (csc_needed) {
6034         src_surface.base = dst_surface.base;
6035         src_surface.type = dst_surface.type;
6036         src_surface.flags = dst_surface.flags;
6037         dst_surface.base = (struct object_base *)obj_surface;
6038         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6039         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6040     }
6041     
6042     if (num_tmp_surfaces)
6043         i965_DestroySurfaces(ctx,
6044                              tmp_surfaces,
6045                              num_tmp_surfaces);
6046
6047     intel_batchbuffer_flush(hw_context->batch);
6048
6049     return VA_STATUS_SUCCESS;
6050
6051 error:
6052     if (num_tmp_surfaces)
6053         i965_DestroySurfaces(ctx,
6054                              tmp_surfaces,
6055                              num_tmp_surfaces);
6056
6057     return status;
6058 }
6059
6060 static void
6061 i965_proc_context_destroy(void *hw_context)
6062 {
6063     struct i965_proc_context * const proc_context = hw_context;
6064     VADriverContextP const ctx = proc_context->driver_context;
6065
6066     i965_post_processing_context_finalize(ctx, &proc_context->pp_context);
6067     intel_batchbuffer_free(proc_context->base.batch);
6068     free(proc_context);
6069 }
6070
6071 struct hw_context *
6072 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6073 {
6074     struct i965_driver_data *i965 = i965_driver_data(ctx);
6075     struct intel_driver_data *intel = intel_driver_data(ctx);
6076     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6077
6078     proc_context->base.destroy = i965_proc_context_destroy;
6079     proc_context->base.run = i965_proc_picture;
6080     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6081     proc_context->driver_context = ctx;
6082     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6083
6084     return (struct hw_context *)proc_context;
6085 }
6086
6087