OSDN Git Service

decode: release huffman_table from decode state
[android-x86/hardware-intel-common-vaapi.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "i965_yuv_coefs.h"
42 #include "intel_media.h"
43
44 #include "gen75_picture_process.h"
45
46 extern VAStatus
47 vpp_surface_convert(VADriverContextP ctx,
48                     struct object_surface *src_obj_surf,
49                     struct object_surface *dst_obj_surf);
50
51 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
52
53 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
54             MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
55
56 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
57 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
58
59 #define GPU_ASM_BLOCK_WIDTH         16
60 #define GPU_ASM_BLOCK_HEIGHT        8
61 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
62
63 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
64
65 static const uint32_t pp_null_gen5[][4] = {
66 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
67 };
68
69 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
70 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
71 };
72
73 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
74 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
75 };
76
77 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
78 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
79 };
80
81 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
82 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
83 };
84
85 static const uint32_t pp_nv12_scaling_gen5[][4] = {
86 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
87 };
88
89 static const uint32_t pp_nv12_avs_gen5[][4] = {
90 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
91 };
92
93 static const uint32_t pp_nv12_dndi_gen5[][4] = {
94 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
95 };
96
97 static const uint32_t pp_nv12_dn_gen5[][4] = {
98 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
99 };
100
101 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
102 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
103 };
104
105 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
106 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
107 };
108
109 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
110 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
111 };
112
113 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
114 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
115 };
116
117 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
118 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
119 };
120
121 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
122 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
123 };
124
125 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
126 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
127 };
128
129 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
130                                    const struct i965_surface *src_surface,
131                                    const VARectangle *src_rect,
132                                    struct i965_surface *dst_surface,
133                                    const VARectangle *dst_rect,
134                                    void *filter_param);
135 static VAStatus
136 pp_nv12_avs_initialize(VADriverContextP ctx,
137                        struct i965_post_processing_context *pp_context,
138                        const struct i965_surface *src_surface, const VARectangle *src_rect,
139                        struct i965_surface *dst_surface, const VARectangle *dst_rect,
140                        void *filter_param);
141 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
142                                            const struct i965_surface *src_surface,
143                                            const VARectangle *src_rect,
144                                            struct i965_surface *dst_surface,
145                                            const VARectangle *dst_rect,
146                                            void *filter_param);
147 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
148                                              const struct i965_surface *src_surface,
149                                              const VARectangle *src_rect,
150                                              struct i965_surface *dst_surface,
151                                              const VARectangle *dst_rect,
152                                              void *filter_param);
153 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
154                                                 const struct i965_surface *src_surface,
155                                                 const VARectangle *src_rect,
156                                                 struct i965_surface *dst_surface,
157                                                 const VARectangle *dst_rect,
158                                                 void *filter_param);
159 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
160                                         const struct i965_surface *src_surface,
161                                         const VARectangle *src_rect,
162                                         struct i965_surface *dst_surface,
163                                         const VARectangle *dst_rect,
164                                         void *filter_param);
165 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
166                                       const struct i965_surface *src_surface,
167                                       const VARectangle *src_rect,
168                                       struct i965_surface *dst_surface,
169                                       const VARectangle *dst_rect,
170                                       void *filter_param);
171
172 static struct pp_module pp_modules_gen5[] = {
173     {
174         {
175             "NULL module (for testing)",
176             PP_NULL,
177             pp_null_gen5,
178             sizeof(pp_null_gen5),
179             NULL,
180         },
181
182         pp_null_initialize,
183     },
184
185     {
186         {
187             "NV12_NV12",
188             PP_NV12_LOAD_SAVE_N12,
189             pp_nv12_load_save_nv12_gen5,
190             sizeof(pp_nv12_load_save_nv12_gen5),
191             NULL,
192         },
193
194         pp_plx_load_save_plx_initialize,
195     },
196
197     {
198         {
199             "NV12_PL3",
200             PP_NV12_LOAD_SAVE_PL3,
201             pp_nv12_load_save_pl3_gen5,
202             sizeof(pp_nv12_load_save_pl3_gen5),
203             NULL,
204         },
205
206         pp_plx_load_save_plx_initialize,
207     },
208
209     {
210         {
211             "PL3_NV12",
212             PP_PL3_LOAD_SAVE_N12,
213             pp_pl3_load_save_nv12_gen5,
214             sizeof(pp_pl3_load_save_nv12_gen5),
215             NULL,
216         },
217
218         pp_plx_load_save_plx_initialize,
219     },
220
221     {
222         {
223             "PL3_PL3",
224             PP_PL3_LOAD_SAVE_PL3,
225             pp_pl3_load_save_pl3_gen5,
226             sizeof(pp_pl3_load_save_pl3_gen5),
227             NULL,
228         },
229
230         pp_plx_load_save_plx_initialize
231     },
232
233     {
234         {
235             "NV12 Scaling module",
236             PP_NV12_SCALING,
237             pp_nv12_scaling_gen5,
238             sizeof(pp_nv12_scaling_gen5),
239             NULL,
240         },
241
242         pp_nv12_scaling_initialize,
243     },
244
245     {
246         {
247             "NV12 AVS module",
248             PP_NV12_AVS,
249             pp_nv12_avs_gen5,
250             sizeof(pp_nv12_avs_gen5),
251             NULL,
252         },
253
254         pp_nv12_avs_initialize,
255     },
256
257     {
258         {
259             "NV12 DNDI module",
260             PP_NV12_DNDI,
261             pp_nv12_dndi_gen5,
262             sizeof(pp_nv12_dndi_gen5),
263             NULL,
264         },
265
266         pp_nv12_dndi_initialize,
267     },
268
269     {
270         {
271             "NV12 DN module",
272             PP_NV12_DN,
273             pp_nv12_dn_gen5,
274             sizeof(pp_nv12_dn_gen5),
275             NULL,
276         },
277
278         pp_nv12_dn_initialize,
279     },
280
281     {
282         {
283             "NV12_PA module",
284             PP_NV12_LOAD_SAVE_PA,
285             pp_nv12_load_save_pa_gen5,
286             sizeof(pp_nv12_load_save_pa_gen5),
287             NULL,
288         },
289
290         pp_plx_load_save_plx_initialize,
291     },
292
293     {
294         {
295             "PL3_PA module",
296             PP_PL3_LOAD_SAVE_PA,
297             pp_pl3_load_save_pa_gen5,
298             sizeof(pp_pl3_load_save_pa_gen5),
299             NULL,
300         },
301
302         pp_plx_load_save_plx_initialize,
303     },
304
305     {
306         {
307             "PA_NV12 module",
308             PP_PA_LOAD_SAVE_NV12,
309             pp_pa_load_save_nv12_gen5,
310             sizeof(pp_pa_load_save_nv12_gen5),
311             NULL,
312         },
313
314         pp_plx_load_save_plx_initialize,
315     },
316
317     {
318         {
319             "PA_PL3 module",
320             PP_PA_LOAD_SAVE_PL3,
321             pp_pa_load_save_pl3_gen5,
322             sizeof(pp_pa_load_save_pl3_gen5),
323             NULL,
324         },
325
326         pp_plx_load_save_plx_initialize,
327     },
328
329     {
330         {
331             "PA_PA module",
332             PP_PA_LOAD_SAVE_PA,
333             pp_pa_load_save_pa_gen5,
334             sizeof(pp_pa_load_save_pa_gen5),
335             NULL,
336         },
337
338         pp_plx_load_save_plx_initialize,
339     },
340
341     {
342         {
343             "RGBX_NV12 module",
344             PP_RGBX_LOAD_SAVE_NV12,
345             pp_rgbx_load_save_nv12_gen5,
346             sizeof(pp_rgbx_load_save_nv12_gen5),
347             NULL,
348         },
349
350         pp_plx_load_save_plx_initialize,
351     },
352
353     {
354         {
355             "NV12_RGBX module",
356             PP_NV12_LOAD_SAVE_RGBX,
357             pp_nv12_load_save_rgbx_gen5,
358             sizeof(pp_nv12_load_save_rgbx_gen5),
359             NULL,
360         },
361
362         pp_plx_load_save_plx_initialize,
363     },
364 };
365
366 static const uint32_t pp_null_gen6[][4] = {
367 #include "shaders/post_processing/gen5_6/null.g6b"
368 };
369
370 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
371 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
372 };
373
374 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
375 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
376 };
377
378 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
379 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
380 };
381
382 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
383 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
384 };
385
386 static const uint32_t pp_nv12_scaling_gen6[][4] = {
387 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
388 };
389
390 static const uint32_t pp_nv12_avs_gen6[][4] = {
391 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
392 };
393
394 static const uint32_t pp_nv12_dndi_gen6[][4] = {
395 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
396 };
397
398 static const uint32_t pp_nv12_dn_gen6[][4] = {
399 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
400 };
401
402 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
403 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
404 };
405
406 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
407 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
408 };
409
410 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
411 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
412 };
413
414 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
415 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
416 };
417
418 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
419 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
420 };
421
422 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
423 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
424 };
425
426 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
427 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
428 };
429
430 static struct pp_module pp_modules_gen6[] = {
431     {
432         {
433             "NULL module (for testing)",
434             PP_NULL,
435             pp_null_gen6,
436             sizeof(pp_null_gen6),
437             NULL,
438         },
439
440         pp_null_initialize,
441     },
442
443     {
444         {
445             "NV12_NV12",
446             PP_NV12_LOAD_SAVE_N12,
447             pp_nv12_load_save_nv12_gen6,
448             sizeof(pp_nv12_load_save_nv12_gen6),
449             NULL,
450         },
451
452         pp_plx_load_save_plx_initialize,
453     },
454
455     {
456         {
457             "NV12_PL3",
458             PP_NV12_LOAD_SAVE_PL3,
459             pp_nv12_load_save_pl3_gen6,
460             sizeof(pp_nv12_load_save_pl3_gen6),
461             NULL,
462         },
463
464         pp_plx_load_save_plx_initialize,
465     },
466
467     {
468         {
469             "PL3_NV12",
470             PP_PL3_LOAD_SAVE_N12,
471             pp_pl3_load_save_nv12_gen6,
472             sizeof(pp_pl3_load_save_nv12_gen6),
473             NULL,
474         },
475
476         pp_plx_load_save_plx_initialize,
477     },
478
479     {
480         {
481             "PL3_PL3",
482             PP_PL3_LOAD_SAVE_PL3,
483             pp_pl3_load_save_pl3_gen6,
484             sizeof(pp_pl3_load_save_pl3_gen6),
485             NULL,
486         },
487
488         pp_plx_load_save_plx_initialize,
489     },
490
491     {
492         {
493             "NV12 Scaling module",
494             PP_NV12_SCALING,
495             pp_nv12_scaling_gen6,
496             sizeof(pp_nv12_scaling_gen6),
497             NULL,
498         },
499
500         gen6_nv12_scaling_initialize,
501     },
502
503     {
504         {
505             "NV12 AVS module",
506             PP_NV12_AVS,
507             pp_nv12_avs_gen6,
508             sizeof(pp_nv12_avs_gen6),
509             NULL,
510         },
511
512         pp_nv12_avs_initialize,
513     },
514
515     {
516         {
517             "NV12 DNDI module",
518             PP_NV12_DNDI,
519             pp_nv12_dndi_gen6,
520             sizeof(pp_nv12_dndi_gen6),
521             NULL,
522         },
523
524         pp_nv12_dndi_initialize,
525     },
526
527     {
528         {
529             "NV12 DN module",
530             PP_NV12_DN,
531             pp_nv12_dn_gen6,
532             sizeof(pp_nv12_dn_gen6),
533             NULL,
534         },
535
536         pp_nv12_dn_initialize,
537     },
538     {
539         {
540             "NV12_PA module",
541             PP_NV12_LOAD_SAVE_PA,
542             pp_nv12_load_save_pa_gen6,
543             sizeof(pp_nv12_load_save_pa_gen6),
544             NULL,
545         },
546
547         pp_plx_load_save_plx_initialize,
548     },
549
550     {
551         {
552             "PL3_PA module",
553             PP_PL3_LOAD_SAVE_PA,
554             pp_pl3_load_save_pa_gen6,
555             sizeof(pp_pl3_load_save_pa_gen6),
556             NULL,
557         },
558
559         pp_plx_load_save_plx_initialize,
560     },
561
562     {
563         {
564             "PA_NV12 module",
565             PP_PA_LOAD_SAVE_NV12,
566             pp_pa_load_save_nv12_gen6,
567             sizeof(pp_pa_load_save_nv12_gen6),
568             NULL,
569         },
570
571         pp_plx_load_save_plx_initialize,
572     },
573
574     {
575         {
576             "PA_PL3 module",
577             PP_PA_LOAD_SAVE_PL3,
578             pp_pa_load_save_pl3_gen6,
579             sizeof(pp_pa_load_save_pl3_gen6),
580             NULL,
581         },
582
583         pp_plx_load_save_plx_initialize,
584     },
585
586     {
587         {
588             "PA_PA module",
589             PP_PA_LOAD_SAVE_PA,
590             pp_pa_load_save_pa_gen6,
591             sizeof(pp_pa_load_save_pa_gen6),
592             NULL,
593         },
594
595         pp_plx_load_save_plx_initialize,
596     },
597
598     {
599         {
600             "RGBX_NV12 module",
601             PP_RGBX_LOAD_SAVE_NV12,
602             pp_rgbx_load_save_nv12_gen6,
603             sizeof(pp_rgbx_load_save_nv12_gen6),
604             NULL,
605         },
606
607         pp_plx_load_save_plx_initialize,
608     },
609
610     {
611         {
612             "NV12_RGBX module",
613             PP_NV12_LOAD_SAVE_RGBX,
614             pp_nv12_load_save_rgbx_gen6,
615             sizeof(pp_nv12_load_save_rgbx_gen6),
616             NULL,
617         },
618
619         pp_plx_load_save_plx_initialize,
620     },
621 };
622
623 static const uint32_t pp_null_gen7[][4] = {
624 };
625
626 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
628 };
629
630 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
631 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
632 };
633
634 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
635 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
636 };
637
638 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
639 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
640 };
641
642 static const uint32_t pp_nv12_scaling_gen7[][4] = {
643 #include "shaders/post_processing/gen7/avs.g7b"
644 };
645
646 static const uint32_t pp_nv12_avs_gen7[][4] = {
647 #include "shaders/post_processing/gen7/avs.g7b"
648 };
649
650 static const uint32_t pp_nv12_dndi_gen7[][4] = {
651 #include "shaders/post_processing/gen7/dndi.g7b"
652 };
653
654 static const uint32_t pp_nv12_dn_gen7[][4] = {
655 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
656 };
657 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
658 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
659 };
660 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
661 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
662 };
663 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
664 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
665 };
666 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
667 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
668 };
669 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
670 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
671 };
672 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
673 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
674 };
675 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
676 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
677 };
678
679 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
680                                            const struct i965_surface *src_surface,
681                                            const VARectangle *src_rect,
682                                            struct i965_surface *dst_surface,
683                                            const VARectangle *dst_rect,
684                                            void *filter_param);
685 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
686                                              const struct i965_surface *src_surface,
687                                              const VARectangle *src_rect,
688                                              struct i965_surface *dst_surface,
689                                              const VARectangle *dst_rect,
690                                              void *filter_param);
691 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
692                                            const struct i965_surface *src_surface,
693                                            const VARectangle *src_rect,
694                                            struct i965_surface *dst_surface,
695                                            const VARectangle *dst_rect,
696                                            void *filter_param);
697
698 static struct pp_module pp_modules_gen7[] = {
699     {
700         {
701             "NULL module (for testing)",
702             PP_NULL,
703             pp_null_gen7,
704             sizeof(pp_null_gen7),
705             NULL,
706         },
707
708         pp_null_initialize,
709     },
710
711     {
712         {
713             "NV12_NV12",
714             PP_NV12_LOAD_SAVE_N12,
715             pp_nv12_load_save_nv12_gen7,
716             sizeof(pp_nv12_load_save_nv12_gen7),
717             NULL,
718         },
719
720         gen7_pp_plx_avs_initialize,
721     },
722
723     {
724         {
725             "NV12_PL3",
726             PP_NV12_LOAD_SAVE_PL3,
727             pp_nv12_load_save_pl3_gen7,
728             sizeof(pp_nv12_load_save_pl3_gen7),
729             NULL,
730         },
731
732         gen7_pp_plx_avs_initialize,
733     },
734
735     {
736         {
737             "PL3_NV12",
738             PP_PL3_LOAD_SAVE_N12,
739             pp_pl3_load_save_nv12_gen7,
740             sizeof(pp_pl3_load_save_nv12_gen7),
741             NULL,
742         },
743
744         gen7_pp_plx_avs_initialize,
745     },
746
747     {
748         {
749             "PL3_PL3",
750             PP_PL3_LOAD_SAVE_PL3,
751             pp_pl3_load_save_pl3_gen7,
752             sizeof(pp_pl3_load_save_pl3_gen7),
753             NULL,
754         },
755
756         gen7_pp_plx_avs_initialize,
757     },
758
759     {
760         {
761             "NV12 Scaling module",
762             PP_NV12_SCALING,
763             pp_nv12_scaling_gen7,
764             sizeof(pp_nv12_scaling_gen7),
765             NULL,
766         },
767
768         gen7_pp_plx_avs_initialize,
769     },
770
771     {
772         {
773             "NV12 AVS module",
774             PP_NV12_AVS,
775             pp_nv12_avs_gen7,
776             sizeof(pp_nv12_avs_gen7),
777             NULL,
778         },
779
780         gen7_pp_plx_avs_initialize,
781     },
782
783     {
784         {
785             "NV12 DNDI module",
786             PP_NV12_DNDI,
787             pp_nv12_dndi_gen7,
788             sizeof(pp_nv12_dndi_gen7),
789             NULL,
790         },
791
792         gen7_pp_nv12_dndi_initialize,
793     },
794
795     {
796         {
797             "NV12 DN module",
798             PP_NV12_DN,
799             pp_nv12_dn_gen7,
800             sizeof(pp_nv12_dn_gen7),
801             NULL,
802         },
803
804         gen7_pp_nv12_dn_initialize,
805     },
806     {
807         {
808             "NV12_PA module",
809             PP_NV12_LOAD_SAVE_PA,
810             pp_nv12_load_save_pa_gen7,
811             sizeof(pp_nv12_load_save_pa_gen7),
812             NULL,
813         },
814
815         gen7_pp_plx_avs_initialize,
816     },
817
818     {
819         {
820             "PL3_PA module",
821             PP_PL3_LOAD_SAVE_PA,
822             pp_pl3_load_save_pa_gen7,
823             sizeof(pp_pl3_load_save_pa_gen7),
824             NULL,
825         },
826
827         gen7_pp_plx_avs_initialize,
828     },
829
830     {
831         {
832             "PA_NV12 module",
833             PP_PA_LOAD_SAVE_NV12,
834             pp_pa_load_save_nv12_gen7,
835             sizeof(pp_pa_load_save_nv12_gen7),
836             NULL,
837         },
838
839         gen7_pp_plx_avs_initialize,
840     },
841
842     {
843         {
844             "PA_PL3 module",
845             PP_PA_LOAD_SAVE_PL3,
846             pp_pa_load_save_pl3_gen7,
847             sizeof(pp_pa_load_save_pl3_gen7),
848             NULL,
849         },
850
851         gen7_pp_plx_avs_initialize,
852     },
853
854     {
855         {
856             "PA_PA module",
857             PP_PA_LOAD_SAVE_PA,
858             pp_pa_load_save_pa_gen7,
859             sizeof(pp_pa_load_save_pa_gen7),
860             NULL,
861         },
862
863         gen7_pp_plx_avs_initialize,
864     },
865
866     {
867         {
868             "RGBX_NV12 module",
869             PP_RGBX_LOAD_SAVE_NV12,
870             pp_rgbx_load_save_nv12_gen7,
871             sizeof(pp_rgbx_load_save_nv12_gen7),
872             NULL,
873         },
874
875         gen7_pp_plx_avs_initialize,
876     },
877
878     {
879         {
880             "NV12_RGBX module",
881             PP_NV12_LOAD_SAVE_RGBX,
882             pp_nv12_load_save_rgbx_gen7,
883             sizeof(pp_nv12_load_save_rgbx_gen7),
884             NULL,
885         },
886
887         gen7_pp_plx_avs_initialize,
888     },
889
890 };
891
892 static const uint32_t pp_null_gen75[][4] = {
893 };
894
895 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
896 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
897 };
898
899 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
900 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
901 };
902
903 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
904 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
905 };
906
907 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
908 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
909 };
910
911 static const uint32_t pp_nv12_scaling_gen75[][4] = {
912 #include "shaders/post_processing/gen7/avs.g75b"
913 };
914
915 static const uint32_t pp_nv12_avs_gen75[][4] = {
916 #include "shaders/post_processing/gen7/avs.g75b"
917 };
918
919 static const uint32_t pp_nv12_dndi_gen75[][4] = {
920 // #include "shaders/post_processing/gen7/dndi.g75b"
921 };
922
923 static const uint32_t pp_nv12_dn_gen75[][4] = {
924 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
925 };
926 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
927 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
928 };
929 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
931 };
932 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
933 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
934 };
935 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
936 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
937 };
938 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
939 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
940 };
941 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
942 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
943 };
944 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
945 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
946 };
947
948 static struct pp_module pp_modules_gen75[] = {
949     {
950         {
951             "NULL module (for testing)",
952             PP_NULL,
953             pp_null_gen75,
954             sizeof(pp_null_gen75),
955             NULL,
956         },
957
958         pp_null_initialize,
959     },
960
961     {
962         {
963             "NV12_NV12",
964             PP_NV12_LOAD_SAVE_N12,
965             pp_nv12_load_save_nv12_gen75,
966             sizeof(pp_nv12_load_save_nv12_gen75),
967             NULL,
968         },
969
970         gen7_pp_plx_avs_initialize,
971     },
972
973     {
974         {
975             "NV12_PL3",
976             PP_NV12_LOAD_SAVE_PL3,
977             pp_nv12_load_save_pl3_gen75,
978             sizeof(pp_nv12_load_save_pl3_gen75),
979             NULL,
980         },
981
982         gen7_pp_plx_avs_initialize,
983     },
984
985     {
986         {
987             "PL3_NV12",
988             PP_PL3_LOAD_SAVE_N12,
989             pp_pl3_load_save_nv12_gen75,
990             sizeof(pp_pl3_load_save_nv12_gen75),
991             NULL,
992         },
993
994         gen7_pp_plx_avs_initialize,
995     },
996
997     {
998         {
999             "PL3_PL3",
1000             PP_PL3_LOAD_SAVE_PL3,
1001             pp_pl3_load_save_pl3_gen75,
1002             sizeof(pp_pl3_load_save_pl3_gen75),
1003             NULL,
1004         },
1005
1006         gen7_pp_plx_avs_initialize,
1007     },
1008
1009     {
1010         {
1011             "NV12 Scaling module",
1012             PP_NV12_SCALING,
1013             pp_nv12_scaling_gen75,
1014             sizeof(pp_nv12_scaling_gen75),
1015             NULL,
1016         },
1017
1018         gen7_pp_plx_avs_initialize,
1019     },
1020
1021     {
1022         {
1023             "NV12 AVS module",
1024             PP_NV12_AVS,
1025             pp_nv12_avs_gen75,
1026             sizeof(pp_nv12_avs_gen75),
1027             NULL,
1028         },
1029
1030         gen7_pp_plx_avs_initialize,
1031     },
1032
1033     {
1034         {
1035             "NV12 DNDI module",
1036             PP_NV12_DNDI,
1037             pp_nv12_dndi_gen75,
1038             sizeof(pp_nv12_dndi_gen75),
1039             NULL,
1040         },
1041
1042         gen7_pp_nv12_dn_initialize,
1043     },
1044
1045     {
1046         {
1047             "NV12 DN module",
1048             PP_NV12_DN,
1049             pp_nv12_dn_gen75,
1050             sizeof(pp_nv12_dn_gen75),
1051             NULL,
1052         },
1053
1054         gen7_pp_nv12_dn_initialize,
1055     },
1056
1057     {
1058         {
1059             "NV12_PA module",
1060             PP_NV12_LOAD_SAVE_PA,
1061             pp_nv12_load_save_pa_gen75,
1062             sizeof(pp_nv12_load_save_pa_gen75),
1063             NULL,
1064         },
1065
1066         gen7_pp_plx_avs_initialize,
1067     },
1068
1069     {
1070         {
1071             "PL3_PA module",
1072             PP_PL3_LOAD_SAVE_PA,
1073             pp_pl3_load_save_pa_gen75,
1074             sizeof(pp_pl3_load_save_pa_gen75),
1075             NULL,
1076         },
1077
1078         gen7_pp_plx_avs_initialize,
1079     },
1080
1081     {
1082         {
1083             "PA_NV12 module",
1084             PP_PA_LOAD_SAVE_NV12,
1085             pp_pa_load_save_nv12_gen75,
1086             sizeof(pp_pa_load_save_nv12_gen75),
1087             NULL,
1088         },
1089
1090         gen7_pp_plx_avs_initialize,
1091     },
1092
1093     {
1094         {
1095             "PA_PL3 module",
1096             PP_PA_LOAD_SAVE_PL3,
1097             pp_pa_load_save_pl3_gen75,
1098             sizeof(pp_pa_load_save_pl3_gen75),
1099             NULL,
1100         },
1101
1102         gen7_pp_plx_avs_initialize,
1103     },
1104
1105     {
1106         {
1107             "PA_PA module",
1108             PP_PA_LOAD_SAVE_PA,
1109             pp_pa_load_save_pa_gen75,
1110             sizeof(pp_pa_load_save_pa_gen75),
1111             NULL,
1112         },
1113
1114         gen7_pp_plx_avs_initialize,
1115     },
1116
1117     {
1118         {
1119             "RGBX_NV12 module",
1120             PP_RGBX_LOAD_SAVE_NV12,
1121             pp_rgbx_load_save_nv12_gen75,
1122             sizeof(pp_rgbx_load_save_nv12_gen75),
1123             NULL,
1124         },
1125
1126         gen7_pp_plx_avs_initialize,
1127     },
1128
1129     {
1130         {
1131             "NV12_RGBX module",
1132             PP_NV12_LOAD_SAVE_RGBX,
1133             pp_nv12_load_save_rgbx_gen75,
1134             sizeof(pp_nv12_load_save_rgbx_gen75),
1135             NULL,
1136         },
1137
1138         gen7_pp_plx_avs_initialize,
1139     },
1140
1141 };
1142
1143 static void
1144 pp_dndi_frame_store_reset(DNDIFrameStore *fs)
1145 {
1146     fs->obj_surface = NULL;
1147     fs->surface_id = VA_INVALID_ID;
1148     fs->is_scratch_surface = 0;
1149 }
1150
1151 static inline void
1152 pp_dndi_frame_store_swap(DNDIFrameStore *fs1, DNDIFrameStore *fs2)
1153 {
1154     const DNDIFrameStore tmpfs = *fs1;
1155     *fs1 = *fs2;
1156     *fs2 = tmpfs;
1157 }
1158
1159 static inline void
1160 pp_dndi_frame_store_clear(DNDIFrameStore *fs, VADriverContextP ctx)
1161 {
1162     if (fs->obj_surface && fs->is_scratch_surface) {
1163         VASurfaceID va_surface = fs->obj_surface->base.id;
1164         i965_DestroySurfaces(ctx, &va_surface, 1);
1165     }
1166     pp_dndi_frame_store_reset(fs);
1167 }
1168
1169 static void
1170 pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
1171 {
1172     int i;
1173
1174     memset(dndi_ctx, 0, sizeof(*dndi_ctx));
1175     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i++)
1176         pp_dndi_frame_store_reset(&dndi_ctx->frame_store[i]);
1177 }
1178
1179 static VAStatus
1180 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
1181                                     struct object_surface *obj_surface,
1182                                     const VAProcPipelineParameterBuffer *pipe_params,
1183                                     const VAProcFilterParameterBufferDeinterlacing *deint_params)
1184 {
1185     DNDIFrameStore *fs;
1186
1187     dndi_ctx->is_di_enabled = 1;
1188     dndi_ctx->is_di_adv_enabled = 0;
1189     dndi_ctx->is_first_frame = 0;
1190     dndi_ctx->is_second_field = 0;
1191
1192     /* Check whether we are deinterlacing the second field */
1193     if (dndi_ctx->is_di_enabled) {
1194         const unsigned int tff =
1195             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
1196         const unsigned int is_top_field =
1197             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
1198
1199         if ((tff ^ is_top_field) != 0) {
1200             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1201             if (fs->surface_id != obj_surface->base.id) {
1202                 WARN_ONCE("invalid surface provided for second field\n");
1203                 return VA_STATUS_ERROR_INVALID_PARAMETER;
1204             }
1205             dndi_ctx->is_second_field = 1;
1206         }
1207     }
1208
1209     /* Check whether we are deinterlacing the first frame */
1210     if (dndi_ctx->is_di_enabled) {
1211         switch (deint_params->algorithm) {
1212         case VAProcDeinterlacingBob:
1213             dndi_ctx->is_first_frame = 1;
1214             break;
1215         case VAProcDeinterlacingMotionAdaptive:
1216         case VAProcDeinterlacingMotionCompensated:
1217             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1218             if (fs->surface_id == VA_INVALID_ID)
1219                 dndi_ctx->is_first_frame = 1;
1220             else if (dndi_ctx->is_second_field) {
1221                 /* At this stage, we have already deinterlaced the
1222                    first field successfully. So, the first frame flag
1223                    is trigerred if the previous field was deinterlaced
1224                    without reference frame */
1225                 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1226                 if (fs->surface_id == VA_INVALID_ID)
1227                     dndi_ctx->is_first_frame = 1;
1228             } else {
1229                 if (pipe_params->num_forward_references < 1 ||
1230                     pipe_params->forward_references[0] == VA_INVALID_ID) {
1231                     WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
1232                     return VA_STATUS_ERROR_INVALID_PARAMETER;
1233                 }
1234             }
1235             dndi_ctx->is_di_adv_enabled = 1;
1236             break;
1237         default:
1238             WARN_ONCE("unsupported deinterlacing algorithm (%d)\n",
1239                       deint_params->algorithm);
1240             return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
1241         }
1242     }
1243     return VA_STATUS_SUCCESS;
1244 }
1245
1246 static VAStatus
1247 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
1248                                         struct i965_post_processing_context *pp_context,
1249                                         struct object_surface *src_surface, struct object_surface *dst_surface)
1250 {
1251     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1252     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1253     unsigned int src_fourcc, dst_fourcc;
1254     unsigned int src_sampling, dst_sampling;
1255     unsigned int src_tiling, dst_tiling;
1256     unsigned int i, swizzle;
1257     VAStatus status;
1258
1259     /* Determine input surface info. Always use NV12 Y-tiled */
1260     if (src_surface->bo) {
1261         src_fourcc = src_surface->fourcc;
1262         src_sampling = src_surface->subsampling;
1263         dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
1264         src_tiling = !!src_tiling;
1265     } else {
1266         src_fourcc = VA_FOURCC_NV12;
1267         src_sampling = SUBSAMPLE_YUV420;
1268         src_tiling = 1;
1269         status = i965_check_alloc_surface_bo(ctx, src_surface,
1270                                              src_tiling, src_fourcc, src_sampling);
1271         if (status != VA_STATUS_SUCCESS)
1272             return status;
1273     }
1274
1275     /* Determine output surface info. Always use NV12 Y-tiled */
1276     if (dst_surface->bo) {
1277         dst_fourcc   = dst_surface->fourcc;
1278         dst_sampling = dst_surface->subsampling;
1279         dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
1280         dst_tiling = !!dst_tiling;
1281     } else {
1282         dst_fourcc = VA_FOURCC_NV12;
1283         dst_sampling = SUBSAMPLE_YUV420;
1284         dst_tiling = 1;
1285         status = i965_check_alloc_surface_bo(ctx, dst_surface,
1286                                              dst_tiling, dst_fourcc, dst_sampling);
1287         if (status != VA_STATUS_SUCCESS)
1288             return status;
1289     }
1290
1291     /* Create pipeline surfaces */
1292     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i ++) {
1293         struct object_surface *obj_surface;
1294         VASurfaceID new_surface;
1295         unsigned int width, height;
1296
1297         if (dndi_ctx->frame_store[i].obj_surface &&
1298             dndi_ctx->frame_store[i].obj_surface->bo)
1299             continue; // user allocated surface, not VPP internal
1300
1301         if (dndi_ctx->frame_store[i].obj_surface) {
1302             obj_surface = dndi_ctx->frame_store[i].obj_surface;
1303             dndi_ctx->frame_store[i].is_scratch_surface = 0;
1304         } else {
1305             if (i <= DNDI_FRAME_IN_STMM) {
1306                 width = src_surface->orig_width;
1307                 height = src_surface->orig_height;
1308             } else {
1309                 width = dst_surface->orig_width;
1310                 height = dst_surface->orig_height;
1311             }
1312
1313             status = i965_CreateSurfaces(ctx, width, height, VA_RT_FORMAT_YUV420,
1314                                          1, &new_surface);
1315             if (status != VA_STATUS_SUCCESS)
1316                 return status;
1317
1318             obj_surface = SURFACE(new_surface);
1319             assert(obj_surface != NULL);
1320             dndi_ctx->frame_store[i].is_scratch_surface = 1;
1321         }
1322
1323         if (i <= DNDI_FRAME_IN_PREVIOUS) {
1324             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1325                                                  src_tiling, src_fourcc, src_sampling);
1326         } else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
1327             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1328                                                  1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
1329         } else if (i >= DNDI_FRAME_OUT_CURRENT) {
1330             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1331                                                  dst_tiling, dst_fourcc, dst_sampling);
1332         }
1333         if (status != VA_STATUS_SUCCESS)
1334             return status;
1335
1336         dndi_ctx->frame_store[i].obj_surface = obj_surface;
1337     }
1338     return VA_STATUS_SUCCESS;
1339 }
1340
1341 static VAStatus
1342 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
1343                                 struct i965_post_processing_context *pp_context,
1344                                 struct object_surface *src_surface, struct object_surface *dst_surface)
1345 {
1346     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1347     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1348     DNDIFrameStore *ifs, *ofs;
1349     bool is_new_frame = false;
1350
1351     /* Update the previous input surface */
1352     is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
1353                    src_surface->base.id;
1354     if (is_new_frame) {
1355         ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1356         ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1357         do {
1358             const VAProcPipelineParameterBuffer * const pipe_params =
1359                 pp_context->pipeline_param;
1360             struct object_surface *obj_surface;
1361
1362             if (pipe_params->num_forward_references < 1)
1363                 break;
1364             if (pipe_params->forward_references[0] == VA_INVALID_ID)
1365                 break;
1366
1367             obj_surface = SURFACE(pipe_params->forward_references[0]);
1368             if (!obj_surface || obj_surface->base.id == ifs->surface_id)
1369                 break;
1370
1371             pp_dndi_frame_store_clear(ifs, ctx);
1372             if (obj_surface->base.id == ofs->surface_id) {
1373                 *ifs = *ofs;
1374                 pp_dndi_frame_store_reset(ofs);
1375             } else {
1376                 ifs->obj_surface = obj_surface;
1377                 ifs->surface_id = obj_surface->base.id;
1378             }
1379         } while (0);
1380     }
1381
1382     /* Update the input surface */
1383     ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1384     pp_dndi_frame_store_clear(ifs, ctx);
1385     ifs->obj_surface = src_surface;
1386     ifs->surface_id = src_surface->base.id;
1387
1388     /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
1389     if (is_new_frame)
1390         pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
1391                                  &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
1392
1393     /* Update the output surfaces */
1394     ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
1395     if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
1396         pp_dndi_frame_store_swap(ofs,
1397                                  &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
1398         if (!dndi_ctx->is_second_field)
1399             ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
1400     }
1401     pp_dndi_frame_store_clear(ofs, ctx);
1402     ofs->obj_surface = dst_surface;
1403     ofs->surface_id = dst_surface->base.id;
1404
1405     return VA_STATUS_SUCCESS;
1406 }
1407
1408 static int
1409 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1410 {
1411     int fourcc;
1412
1413     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1414         struct object_image *obj_image = (struct object_image *)surface->base;
1415         fourcc = obj_image->image.format.fourcc;
1416     } else {
1417         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1418         fourcc = obj_surface->fourcc;
1419     }
1420
1421     return fourcc;
1422 }
1423
1424 static void
1425 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1426 {
1427     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1428         struct object_image *obj_image = (struct object_image *)surface->base;
1429
1430         *width = obj_image->image.width;
1431         *height = obj_image->image.height;
1432     } else {
1433         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1434
1435         *width = obj_surface->orig_width;
1436         *height = obj_surface->orig_height;
1437     }
1438 }
1439
1440 static void
1441 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1442 {
1443     switch (tiling) {
1444     case I915_TILING_NONE:
1445         ss->ss3.tiled_surface = 0;
1446         ss->ss3.tile_walk = 0;
1447         break;
1448     case I915_TILING_X:
1449         ss->ss3.tiled_surface = 1;
1450         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1451         break;
1452     case I915_TILING_Y:
1453         ss->ss3.tiled_surface = 1;
1454         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1455         break;
1456     }
1457 }
1458
1459 static void
1460 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1461 {
1462     switch (tiling) {
1463     case I915_TILING_NONE:
1464         ss->ss2.tiled_surface = 0;
1465         ss->ss2.tile_walk = 0;
1466         break;
1467     case I915_TILING_X:
1468         ss->ss2.tiled_surface = 1;
1469         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1470         break;
1471     case I915_TILING_Y:
1472         ss->ss2.tiled_surface = 1;
1473         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1474         break;
1475     }
1476 }
1477
1478 static void
1479 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1480 {
1481     switch (tiling) {
1482     case I915_TILING_NONE:
1483         ss->ss0.tiled_surface = 0;
1484         ss->ss0.tile_walk = 0;
1485         break;
1486     case I915_TILING_X:
1487         ss->ss0.tiled_surface = 1;
1488         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1489         break;
1490     case I915_TILING_Y:
1491         ss->ss0.tiled_surface = 1;
1492         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1493         break;
1494     }
1495 }
1496
1497 static void
1498 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1499 {
1500     switch (tiling) {
1501     case I915_TILING_NONE:
1502         ss->ss2.tiled_surface = 0;
1503         ss->ss2.tile_walk = 0;
1504         break;
1505     case I915_TILING_X:
1506         ss->ss2.tiled_surface = 1;
1507         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1508         break;
1509     case I915_TILING_Y:
1510         ss->ss2.tiled_surface = 1;
1511         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1512         break;
1513     }
1514 }
1515
1516 static void
1517 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1518 {
1519     struct i965_interface_descriptor *desc;
1520     dri_bo *bo;
1521     int pp_index = pp_context->current_pp;
1522
1523     bo = pp_context->idrt.bo;
1524     dri_bo_map(bo, 1);
1525     assert(bo->virtual);
1526     desc = bo->virtual;
1527     memset(desc, 0, sizeof(*desc));
1528     desc->desc0.grf_reg_blocks = 10;
1529     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1530     desc->desc1.const_urb_entry_read_offset = 0;
1531     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1532     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1533     desc->desc2.sampler_count = 0;
1534     desc->desc3.binding_table_entry_count = 0;
1535     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1536
1537     dri_bo_emit_reloc(bo,
1538                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1539                       desc->desc0.grf_reg_blocks,
1540                       offsetof(struct i965_interface_descriptor, desc0),
1541                       pp_context->pp_modules[pp_index].kernel.bo);
1542
1543     dri_bo_emit_reloc(bo,
1544                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1545                       desc->desc2.sampler_count << 2,
1546                       offsetof(struct i965_interface_descriptor, desc2),
1547                       pp_context->sampler_state_table.bo);
1548
1549     dri_bo_unmap(bo);
1550     pp_context->idrt.num_interface_descriptors++;
1551 }
1552
1553 static void
1554 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1555 {
1556     struct i965_vfe_state *vfe_state;
1557     dri_bo *bo;
1558
1559     bo = pp_context->vfe_state.bo;
1560     dri_bo_map(bo, 1);
1561     assert(bo->virtual);
1562     vfe_state = bo->virtual;
1563     memset(vfe_state, 0, sizeof(*vfe_state));
1564     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1565     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1566     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1567     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1568     vfe_state->vfe1.children_present = 0;
1569     vfe_state->vfe2.interface_descriptor_base =
1570         pp_context->idrt.bo->offset >> 4; /* reloc */
1571     dri_bo_emit_reloc(bo,
1572                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1573                       0,
1574                       offsetof(struct i965_vfe_state, vfe2),
1575                       pp_context->idrt.bo);
1576     dri_bo_unmap(bo);
1577 }
1578
1579 static void
1580 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1581 {
1582     unsigned char *constant_buffer;
1583     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1584
1585     assert(sizeof(*pp_static_parameter) == 128);
1586     dri_bo_map(pp_context->curbe.bo, 1);
1587     assert(pp_context->curbe.bo->virtual);
1588     constant_buffer = pp_context->curbe.bo->virtual;
1589     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1590     dri_bo_unmap(pp_context->curbe.bo);
1591 }
1592
1593 static void
1594 ironlake_pp_states_setup(VADriverContextP ctx,
1595                          struct i965_post_processing_context *pp_context)
1596 {
1597     ironlake_pp_interface_descriptor_table(pp_context);
1598     ironlake_pp_vfe_state(pp_context);
1599     ironlake_pp_upload_constants(pp_context);
1600 }
1601
1602 static void
1603 ironlake_pp_pipeline_select(VADriverContextP ctx,
1604                             struct i965_post_processing_context *pp_context)
1605 {
1606     struct intel_batchbuffer *batch = pp_context->batch;
1607
1608     BEGIN_BATCH(batch, 1);
1609     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1610     ADVANCE_BATCH(batch);
1611 }
1612
1613 static void
1614 ironlake_pp_urb_layout(VADriverContextP ctx,
1615                        struct i965_post_processing_context *pp_context)
1616 {
1617     struct intel_batchbuffer *batch = pp_context->batch;
1618     unsigned int vfe_fence, cs_fence;
1619
1620     vfe_fence = pp_context->urb.cs_start;
1621     cs_fence = pp_context->urb.size;
1622
1623     BEGIN_BATCH(batch, 3);
1624     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1625     OUT_BATCH(batch, 0);
1626     OUT_BATCH(batch,
1627               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1628               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1629     ADVANCE_BATCH(batch);
1630 }
1631
1632 static void
1633 ironlake_pp_state_base_address(VADriverContextP ctx,
1634                                struct i965_post_processing_context *pp_context)
1635 {
1636     struct intel_batchbuffer *batch = pp_context->batch;
1637
1638     BEGIN_BATCH(batch, 8);
1639     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1640     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1641     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1642     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1643     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1644     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1645     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1646     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1647     ADVANCE_BATCH(batch);
1648 }
1649
1650 static void
1651 ironlake_pp_state_pointers(VADriverContextP ctx,
1652                            struct i965_post_processing_context *pp_context)
1653 {
1654     struct intel_batchbuffer *batch = pp_context->batch;
1655
1656     BEGIN_BATCH(batch, 3);
1657     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1658     OUT_BATCH(batch, 0);
1659     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1660     ADVANCE_BATCH(batch);
1661 }
1662
1663 static void
1664 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1665                           struct i965_post_processing_context *pp_context)
1666 {
1667     struct intel_batchbuffer *batch = pp_context->batch;
1668
1669     BEGIN_BATCH(batch, 2);
1670     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1671     OUT_BATCH(batch,
1672               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1673               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1674     ADVANCE_BATCH(batch);
1675 }
1676
1677 static void
1678 ironlake_pp_constant_buffer(VADriverContextP ctx,
1679                             struct i965_post_processing_context *pp_context)
1680 {
1681     struct intel_batchbuffer *batch = pp_context->batch;
1682
1683     BEGIN_BATCH(batch, 2);
1684     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1685     OUT_RELOC(batch, pp_context->curbe.bo,
1686               I915_GEM_DOMAIN_INSTRUCTION, 0,
1687               pp_context->urb.size_cs_entry - 1);
1688     ADVANCE_BATCH(batch);
1689 }
1690
1691 static void
1692 ironlake_pp_object_walker(VADriverContextP ctx,
1693                           struct i965_post_processing_context *pp_context)
1694 {
1695     struct intel_batchbuffer *batch = pp_context->batch;
1696     int x, x_steps, y, y_steps;
1697     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1698
1699     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1700     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1701
1702     for (y = 0; y < y_steps; y++) {
1703         for (x = 0; x < x_steps; x++) {
1704             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1705                 BEGIN_BATCH(batch, 20);
1706                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1707                 OUT_BATCH(batch, 0);
1708                 OUT_BATCH(batch, 0); /* no indirect data */
1709                 OUT_BATCH(batch, 0);
1710
1711                 /* inline data grf 5-6 */
1712                 assert(sizeof(*pp_inline_parameter) == 64);
1713                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1714
1715                 ADVANCE_BATCH(batch);
1716             }
1717         }
1718     }
1719 }
1720
1721 static void
1722 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1723                            struct i965_post_processing_context *pp_context)
1724 {
1725     struct intel_batchbuffer *batch = pp_context->batch;
1726
1727     intel_batchbuffer_start_atomic(batch, 0x1000);
1728     intel_batchbuffer_emit_mi_flush(batch);
1729     ironlake_pp_pipeline_select(ctx, pp_context);
1730     ironlake_pp_state_base_address(ctx, pp_context);
1731     ironlake_pp_state_pointers(ctx, pp_context);
1732     ironlake_pp_urb_layout(ctx, pp_context);
1733     ironlake_pp_cs_urb_layout(ctx, pp_context);
1734     ironlake_pp_constant_buffer(ctx, pp_context);
1735     ironlake_pp_object_walker(ctx, pp_context);
1736     intel_batchbuffer_end_atomic(batch);
1737 }
1738
1739 // update u/v offset when the surface format are packed yuv
1740 static void i965_update_src_surface_static_parameter(
1741     VADriverContextP    ctx,
1742     struct i965_post_processing_context *pp_context,
1743     const struct i965_surface *surface)
1744 {
1745     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1746     int fourcc = pp_get_surface_fourcc(ctx, surface);
1747
1748     switch (fourcc) {
1749     case VA_FOURCC_YUY2:
1750         pp_static_parameter->grf1.source_packed_u_offset = 1;
1751         pp_static_parameter->grf1.source_packed_v_offset = 3;
1752         break;
1753     case VA_FOURCC_UYVY:
1754         pp_static_parameter->grf1.source_packed_y_offset = 1;
1755         pp_static_parameter->grf1.source_packed_v_offset = 2;
1756         break;
1757     case VA_FOURCC_BGRX:
1758     case VA_FOURCC_BGRA:
1759         pp_static_parameter->grf1.source_rgb_layout = 0;
1760         break;
1761     case VA_FOURCC_RGBX:
1762     case VA_FOURCC_RGBA:
1763         pp_static_parameter->grf1.source_rgb_layout = 1;
1764         break;
1765     default:
1766         break;
1767     }
1768
1769 }
1770
1771 static void i965_update_dst_surface_static_parameter(
1772     VADriverContextP    ctx,
1773     struct i965_post_processing_context *pp_context,
1774     const struct i965_surface *surface)
1775 {
1776     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1777     int fourcc = pp_get_surface_fourcc(ctx, surface);
1778
1779     switch (fourcc) {
1780     case VA_FOURCC_YUY2:
1781         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1782         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1783         break;
1784     case VA_FOURCC_UYVY:
1785         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1786         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1787         break;
1788     case VA_FOURCC_BGRX:
1789     case VA_FOURCC_BGRA:
1790         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1791         break;
1792     case VA_FOURCC_RGBX:
1793     case VA_FOURCC_RGBA:
1794         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1795         break;
1796     default:
1797         break;
1798     }
1799
1800 }
1801
1802 static void
1803 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1804                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1805                           int width, int height, int pitch, int format,
1806                           int index, int is_target)
1807 {
1808     struct i965_surface_state *ss;
1809     dri_bo *ss_bo;
1810     unsigned int tiling;
1811     unsigned int swizzle;
1812
1813     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1814     ss_bo = pp_context->surface_state_binding_table.bo;
1815     assert(ss_bo);
1816
1817     dri_bo_map(ss_bo, True);
1818     assert(ss_bo->virtual);
1819     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1820     memset(ss, 0, sizeof(*ss));
1821     ss->ss0.surface_type = I965_SURFACE_2D;
1822     ss->ss0.surface_format = format;
1823     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1824     ss->ss2.width = width - 1;
1825     ss->ss2.height = height - 1;
1826     ss->ss3.pitch = pitch - 1;
1827     pp_set_surface_tiling(ss, tiling);
1828     dri_bo_emit_reloc(ss_bo,
1829                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1830                       surf_bo_offset,
1831                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1832                       surf_bo);
1833     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1834     dri_bo_unmap(ss_bo);
1835 }
1836
1837 static void
1838 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1839                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1840                            int width, int height, int wpitch,
1841                            int xoffset, int yoffset,
1842                            int format, int interleave_chroma,
1843                            int index)
1844 {
1845     struct i965_surface_state2 *ss2;
1846     dri_bo *ss2_bo;
1847     unsigned int tiling;
1848     unsigned int swizzle;
1849
1850     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1851     ss2_bo = pp_context->surface_state_binding_table.bo;
1852     assert(ss2_bo);
1853
1854     dri_bo_map(ss2_bo, True);
1855     assert(ss2_bo->virtual);
1856     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1857     memset(ss2, 0, sizeof(*ss2));
1858     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1859     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1860     ss2->ss1.width = width - 1;
1861     ss2->ss1.height = height - 1;
1862     ss2->ss2.pitch = wpitch - 1;
1863     ss2->ss2.interleave_chroma = interleave_chroma;
1864     ss2->ss2.surface_format = format;
1865     ss2->ss3.x_offset_for_cb = xoffset;
1866     ss2->ss3.y_offset_for_cb = yoffset;
1867     pp_set_surface2_tiling(ss2, tiling);
1868     dri_bo_emit_reloc(ss2_bo,
1869                       I915_GEM_DOMAIN_RENDER, 0,
1870                       surf_bo_offset,
1871                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1872                       surf_bo);
1873     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1874     dri_bo_unmap(ss2_bo);
1875 }
1876
1877 static void
1878 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1879                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1880                           int width, int height, int pitch, int format,
1881                           int index, int is_target)
1882 {
1883     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1884     struct gen7_surface_state *ss;
1885     dri_bo *ss_bo;
1886     unsigned int tiling;
1887     unsigned int swizzle;
1888
1889     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1890     ss_bo = pp_context->surface_state_binding_table.bo;
1891     assert(ss_bo);
1892
1893     dri_bo_map(ss_bo, True);
1894     assert(ss_bo->virtual);
1895     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1896     memset(ss, 0, sizeof(*ss));
1897     ss->ss0.surface_type = I965_SURFACE_2D;
1898     ss->ss0.surface_format = format;
1899     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1900     ss->ss2.width = width - 1;
1901     ss->ss2.height = height - 1;
1902     ss->ss3.pitch = pitch - 1;
1903     gen7_pp_set_surface_tiling(ss, tiling);
1904     if (IS_HASWELL(i965->intel.device_info))
1905         gen7_render_set_surface_scs(ss);
1906     dri_bo_emit_reloc(ss_bo,
1907                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1908                       surf_bo_offset,
1909                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1910                       surf_bo);
1911     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1912     dri_bo_unmap(ss_bo);
1913 }
1914
1915 static void
1916 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1917                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1918                            int width, int height, int wpitch,
1919                            int xoffset, int yoffset,
1920                            int format, int interleave_chroma,
1921                            int index)
1922 {
1923     struct gen7_surface_state2 *ss2;
1924     dri_bo *ss2_bo;
1925     unsigned int tiling;
1926     unsigned int swizzle;
1927
1928     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1929     ss2_bo = pp_context->surface_state_binding_table.bo;
1930     assert(ss2_bo);
1931
1932     dri_bo_map(ss2_bo, True);
1933     assert(ss2_bo->virtual);
1934     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1935     memset(ss2, 0, sizeof(*ss2));
1936     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1937     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1938     ss2->ss1.width = width - 1;
1939     ss2->ss1.height = height - 1;
1940     ss2->ss2.pitch = wpitch - 1;
1941     ss2->ss2.interleave_chroma = interleave_chroma;
1942     ss2->ss2.surface_format = format;
1943     ss2->ss3.x_offset_for_cb = xoffset;
1944     ss2->ss3.y_offset_for_cb = yoffset;
1945     gen7_pp_set_surface2_tiling(ss2, tiling);
1946     dri_bo_emit_reloc(ss2_bo,
1947                       I915_GEM_DOMAIN_RENDER, 0,
1948                       surf_bo_offset,
1949                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1950                       surf_bo);
1951     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1952     dri_bo_unmap(ss2_bo);
1953 }
1954
1955 static void
1956 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1957                                 const struct i965_surface *surface,
1958                                 int base_index, int is_target,
1959                                 int *width, int *height, int *pitch, int *offset)
1960 {
1961     struct object_surface *obj_surface;
1962     struct object_image *obj_image;
1963     dri_bo *bo;
1964     int fourcc = pp_get_surface_fourcc(ctx, surface);
1965     const int Y = 0;
1966     const int U = ((fourcc == VA_FOURCC_YV12) ||
1967                    (fourcc == VA_FOURCC_YV16))
1968                   ? 2 : 1;
1969     const int V = ((fourcc == VA_FOURCC_YV12) ||
1970                    (fourcc == VA_FOURCC_YV16))
1971                   ? 1 : 2;
1972     const int UV = 1;
1973     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1974     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1975     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1976                               fourcc == VA_FOURCC_RGBX ||
1977                               fourcc == VA_FOURCC_BGRA ||
1978                               fourcc == VA_FOURCC_BGRX);
1979     int scale_factor_of_1st_plane_width_in_byte = 1;
1980
1981     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1982         obj_surface = (struct object_surface *)surface->base;
1983         bo = obj_surface->bo;
1984         width[0] = obj_surface->orig_width;
1985         height[0] = obj_surface->orig_height;
1986         pitch[0] = obj_surface->width;
1987         offset[0] = 0;
1988
1989         if (full_packed_format) {
1990             scale_factor_of_1st_plane_width_in_byte = 4;
1991         } else if (packed_yuv) {
1992             scale_factor_of_1st_plane_width_in_byte =  2;
1993         } else if (interleaved_uv) {
1994             width[1] = obj_surface->orig_width;
1995             height[1] = obj_surface->orig_height / 2;
1996             pitch[1] = obj_surface->width;
1997             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1998         } else {
1999             width[1] = obj_surface->orig_width / 2;
2000             height[1] = obj_surface->orig_height / 2;
2001             pitch[1] = obj_surface->width / 2;
2002             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2003             width[2] = obj_surface->orig_width / 2;
2004             height[2] = obj_surface->orig_height / 2;
2005             pitch[2] = obj_surface->width / 2;
2006             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2007         }
2008     } else {
2009         obj_image = (struct object_image *)surface->base;
2010         bo = obj_image->bo;
2011         width[0] = obj_image->image.width;
2012         height[0] = obj_image->image.height;
2013         pitch[0] = obj_image->image.pitches[0];
2014         offset[0] = obj_image->image.offsets[0];
2015
2016         if (full_packed_format) {
2017             scale_factor_of_1st_plane_width_in_byte = 4;
2018         } else if (packed_yuv) {
2019             scale_factor_of_1st_plane_width_in_byte = 2;
2020         } else if (interleaved_uv) {
2021             width[1] = obj_image->image.width;
2022             height[1] = obj_image->image.height / 2;
2023             pitch[1] = obj_image->image.pitches[1];
2024             offset[1] = obj_image->image.offsets[1];
2025         } else {
2026             width[1] = obj_image->image.width / 2;
2027             height[1] = obj_image->image.height / 2;
2028             pitch[1] = obj_image->image.pitches[1];
2029             offset[1] = obj_image->image.offsets[1];
2030             width[2] = obj_image->image.width / 2;
2031             height[2] = obj_image->image.height / 2;
2032             pitch[2] = obj_image->image.pitches[2];
2033             offset[2] = obj_image->image.offsets[2];
2034             if (fourcc == VA_FOURCC_YV16) {
2035                 width[1] = obj_image->image.width / 2;
2036                 height[1] = obj_image->image.height;
2037                 width[2] = obj_image->image.width / 2;
2038                 height[2] = obj_image->image.height;
2039             }
2040         }
2041     }
2042
2043     /* Y surface */
2044     i965_pp_set_surface_state(ctx, pp_context,
2045                               bo, offset[Y],
2046                               ALIGN(width[Y] *scale_factor_of_1st_plane_width_in_byte, 4) / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2047                               base_index, is_target);
2048
2049     if (!packed_yuv && !full_packed_format) {
2050         if (interleaved_uv) {
2051             i965_pp_set_surface_state(ctx, pp_context,
2052                                       bo, offset[UV],
2053                                       ALIGN(width[UV], 4) / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2054                                       base_index + 1, is_target);
2055         } else {
2056             /* U surface */
2057             i965_pp_set_surface_state(ctx, pp_context,
2058                                       bo, offset[U],
2059                                       ALIGN(width[U], 4) / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2060                                       base_index + 1, is_target);
2061
2062             /* V surface */
2063             i965_pp_set_surface_state(ctx, pp_context,
2064                                       bo, offset[V],
2065                                       ALIGN(width[V], 4) / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2066                                       base_index + 2, is_target);
2067         }
2068     }
2069
2070 }
2071
2072 static void
2073 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2074                                      const struct i965_surface *surface,
2075                                      int base_index, int is_target,
2076                                      const VARectangle *rect,
2077                                      int *width, int *height, int *pitch, int *offset)
2078 {
2079     struct object_surface *obj_surface;
2080     struct object_image *obj_image;
2081     dri_bo *bo;
2082     int fourcc = pp_get_surface_fourcc(ctx, surface);
2083     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
2084
2085     if (fourcc_info == NULL)
2086         return;
2087
2088     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2089         obj_surface = (struct object_surface *)surface->base;
2090         bo = obj_surface->bo;
2091         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
2092         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
2093         pitch[0] = obj_surface->width;
2094         offset[0] = 0;
2095
2096         if (fourcc_info->num_planes == 1 && is_target)
2097             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2098
2099         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2100         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2101         pitch[1] = obj_surface->cb_cr_pitch;
2102         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2103
2104         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2105         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2106         pitch[2] = obj_surface->cb_cr_pitch;
2107         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2108     } else {
2109         int U = 0, V = 0;
2110
2111         /* FIXME: add support for ARGB/ABGR image */
2112         obj_image = (struct object_image *)surface->base;
2113         bo = obj_image->bo;
2114         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
2115         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
2116         pitch[0] = obj_image->image.pitches[0];
2117         offset[0] = obj_image->image.offsets[0];
2118
2119         if (fourcc_info->num_planes == 1) {
2120             if (is_target)
2121                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2122         } else if (fourcc_info->num_planes == 2) {
2123             U = 1, V = 1;
2124         } else {
2125             assert(fourcc_info->num_components == 3);
2126
2127             U = fourcc_info->components[1].plane;
2128             V = fourcc_info->components[2].plane;
2129             assert((U == 1 && V == 2) ||
2130                    (U == 2 && V == 1));
2131         }
2132
2133         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
2134         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2135         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2136         pitch[1] = obj_image->image.pitches[U];
2137         offset[1] = obj_image->image.offsets[U];
2138
2139         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2140         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2141         pitch[2] = obj_image->image.pitches[V];
2142         offset[2] = obj_image->image.offsets[V];
2143     }
2144
2145     if (is_target) {
2146         gen7_pp_set_surface_state(ctx, pp_context,
2147                                   bo, 0,
2148                                   ALIGN(width[0], 4) / 4, height[0], pitch[0],
2149                                   I965_SURFACEFORMAT_R8_UINT,
2150                                   base_index, 1);
2151
2152         if (fourcc_info->num_planes == 2) {
2153             gen7_pp_set_surface_state(ctx, pp_context,
2154                                       bo, offset[1],
2155                                       ALIGN(width[1], 2) / 2, height[1], pitch[1],
2156                                       I965_SURFACEFORMAT_R8G8_SINT,
2157                                       base_index + 1, 1);
2158         } else if (fourcc_info->num_planes == 3) {
2159             gen7_pp_set_surface_state(ctx, pp_context,
2160                                       bo, offset[1],
2161                                       ALIGN(width[1], 4) / 4, height[1], pitch[1],
2162                                       I965_SURFACEFORMAT_R8_SINT,
2163                                       base_index + 1, 1);
2164             gen7_pp_set_surface_state(ctx, pp_context,
2165                                       bo, offset[2],
2166                                       ALIGN(width[2], 4) / 4, height[2], pitch[2],
2167                                       I965_SURFACEFORMAT_R8_SINT,
2168                                       base_index + 2, 1);
2169         }
2170
2171         if (fourcc_info->format == I965_COLOR_RGB) {
2172             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2173             /* the format is MSB: X-B-G-R */
2174             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2175             if ((fourcc == VA_FOURCC_BGRA) ||
2176                 (fourcc == VA_FOURCC_BGRX)) {
2177                 /* It is stored as MSB: X-R-G-B */
2178                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2179             }
2180         }
2181     } else {
2182         int format0 = SURFACE_FORMAT_Y8_UNORM;
2183
2184         switch (fourcc) {
2185         case VA_FOURCC_YUY2:
2186             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2187             break;
2188
2189         case VA_FOURCC_UYVY:
2190             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2191             break;
2192
2193         default:
2194             break;
2195         }
2196
2197         if (fourcc_info->format == I965_COLOR_RGB) {
2198             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2199             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2200             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2201             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2202             if ((fourcc == VA_FOURCC_BGRA) ||
2203                 (fourcc == VA_FOURCC_BGRX)) {
2204                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2205             }
2206         }
2207
2208         gen7_pp_set_surface2_state(ctx, pp_context,
2209                                    bo, offset[0],
2210                                    width[0], height[0], pitch[0],
2211                                    0, 0,
2212                                    format0, 0,
2213                                    base_index);
2214
2215         if (fourcc_info->num_planes == 2) {
2216             gen7_pp_set_surface2_state(ctx, pp_context,
2217                                        bo, offset[1],
2218                                        width[1], height[1], pitch[1],
2219                                        0, 0,
2220                                        SURFACE_FORMAT_R8B8_UNORM, 0,
2221                                        base_index + 1);
2222         } else if (fourcc_info->num_planes == 3) {
2223             gen7_pp_set_surface2_state(ctx, pp_context,
2224                                        bo, offset[1],
2225                                        width[1], height[1], pitch[1],
2226                                        0, 0,
2227                                        SURFACE_FORMAT_R8_UNORM, 0,
2228                                        base_index + 1);
2229             gen7_pp_set_surface2_state(ctx, pp_context,
2230                                        bo, offset[2],
2231                                        width[2], height[2], pitch[2],
2232                                        0, 0,
2233                                        SURFACE_FORMAT_R8_UNORM, 0,
2234                                        base_index + 2);
2235         }
2236     }
2237 }
2238
2239 static int
2240 pp_null_x_steps(void *private_context)
2241 {
2242     return 1;
2243 }
2244
2245 static int
2246 pp_null_y_steps(void *private_context)
2247 {
2248     return 1;
2249 }
2250
2251 static int
2252 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2253 {
2254     return 0;
2255 }
2256
2257 static VAStatus
2258 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2259                    const struct i965_surface *src_surface,
2260                    const VARectangle *src_rect,
2261                    struct i965_surface *dst_surface,
2262                    const VARectangle *dst_rect,
2263                    void *filter_param)
2264 {
2265     /* private function & data */
2266     pp_context->pp_x_steps = pp_null_x_steps;
2267     pp_context->pp_y_steps = pp_null_y_steps;
2268     pp_context->private_context = NULL;
2269     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2270
2271     dst_surface->flags = src_surface->flags;
2272
2273     return VA_STATUS_SUCCESS;
2274 }
2275
2276 static int
2277 pp_load_save_x_steps(void *private_context)
2278 {
2279     return 1;
2280 }
2281
2282 static int
2283 pp_load_save_y_steps(void *private_context)
2284 {
2285     struct pp_load_save_context *pp_load_save_context = private_context;
2286
2287     return pp_load_save_context->dest_h / 8;
2288 }
2289
2290 static int
2291 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2292 {
2293     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2294     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2295
2296     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2297     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2298
2299     return 0;
2300 }
2301
2302 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2303 {
2304     int i;
2305     /* x offset of dest surface must be dword aligned.
2306      * so we have to extend dst surface on left edge, and mask out pixels not interested
2307      */
2308     if (dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT) {
2309         pp_context->block_horizontal_mask_left = 0;
2310         for (i = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT; i < GPU_ASM_BLOCK_WIDTH; i++) {
2311             pp_context->block_horizontal_mask_left |= 1 << i;
2312         }
2313     } else {
2314         pp_context->block_horizontal_mask_left = 0xffff;
2315     }
2316
2317     int dst_width_adjust = dst_rect->width + dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2318     if (dst_width_adjust % GPU_ASM_BLOCK_WIDTH) {
2319         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust % GPU_ASM_BLOCK_WIDTH)) - 1;
2320     } else {
2321         pp_context->block_horizontal_mask_right = 0xffff;
2322     }
2323
2324     if (dst_rect->height % GPU_ASM_BLOCK_HEIGHT) {
2325         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height % GPU_ASM_BLOCK_HEIGHT)) - 1;
2326     } else {
2327         pp_context->block_vertical_mask_bottom = 0xff;
2328     }
2329
2330 }
2331 static VAStatus
2332 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2333                                 const struct i965_surface *src_surface,
2334                                 const VARectangle *src_rect,
2335                                 struct i965_surface *dst_surface,
2336                                 const VARectangle *dst_rect,
2337                                 void *filter_param)
2338 {
2339     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2340     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2341     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2342     int width[3], height[3], pitch[3], offset[3];
2343
2344     /* source surface */
2345     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2346                                     width, height, pitch, offset);
2347
2348     /* destination surface */
2349     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2350                                     width, height, pitch, offset);
2351
2352     /* private function & data */
2353     pp_context->pp_x_steps = pp_load_save_x_steps;
2354     pp_context->pp_y_steps = pp_load_save_y_steps;
2355     pp_context->private_context = &pp_context->pp_load_save_context;
2356     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2357
2358     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;;
2359     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2360     pp_load_save_context->dest_y = dst_rect->y;
2361     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2362     pp_load_save_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2363
2364     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2365     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2366
2367     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2368     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2369
2370     // update u/v offset for packed yuv
2371     i965_update_src_surface_static_parameter(ctx, pp_context, src_surface);
2372     i965_update_dst_surface_static_parameter(ctx, pp_context, dst_surface);
2373
2374     dst_surface->flags = src_surface->flags;
2375
2376     return VA_STATUS_SUCCESS;
2377 }
2378
2379 static int
2380 pp_scaling_x_steps(void *private_context)
2381 {
2382     return 1;
2383 }
2384
2385 static int
2386 pp_scaling_y_steps(void *private_context)
2387 {
2388     struct pp_scaling_context *pp_scaling_context = private_context;
2389
2390     return pp_scaling_context->dest_h / 8;
2391 }
2392
2393 static int
2394 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2395 {
2396     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2397     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2398     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2399     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2400     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2401
2402     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2403     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2404     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2405     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2406
2407     return 0;
2408 }
2409
2410 static VAStatus
2411 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2412                            const struct i965_surface *src_surface,
2413                            const VARectangle *src_rect,
2414                            struct i965_surface *dst_surface,
2415                            const VARectangle *dst_rect,
2416                            void *filter_param)
2417 {
2418     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2419     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2420     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2421     struct object_surface *obj_surface;
2422     struct i965_sampler_state *sampler_state;
2423     int in_w, in_h, in_wpitch, in_hpitch;
2424     int out_w, out_h, out_wpitch, out_hpitch;
2425
2426     /* source surface */
2427     obj_surface = (struct object_surface *)src_surface->base;
2428     in_w = obj_surface->orig_width;
2429     in_h = obj_surface->orig_height;
2430     in_wpitch = obj_surface->width;
2431     in_hpitch = obj_surface->height;
2432
2433     /* source Y surface index 1 */
2434     i965_pp_set_surface_state(ctx, pp_context,
2435                               obj_surface->bo, 0,
2436                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2437                               1, 0);
2438
2439     /* source UV surface index 2 */
2440     i965_pp_set_surface_state(ctx, pp_context,
2441                               obj_surface->bo, in_wpitch * in_hpitch,
2442                               ALIGN(in_w, 2) / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2443                               2, 0);
2444
2445     /* destination surface */
2446     obj_surface = (struct object_surface *)dst_surface->base;
2447     out_w = obj_surface->orig_width;
2448     out_h = obj_surface->orig_height;
2449     out_wpitch = obj_surface->width;
2450     out_hpitch = obj_surface->height;
2451
2452     /* destination Y surface index 7 */
2453     i965_pp_set_surface_state(ctx, pp_context,
2454                               obj_surface->bo, 0,
2455                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2456                               7, 1);
2457
2458     /* destination UV surface index 8 */
2459     i965_pp_set_surface_state(ctx, pp_context,
2460                               obj_surface->bo, out_wpitch * out_hpitch,
2461                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2462                               8, 1);
2463
2464     /* sampler state */
2465     dri_bo_map(pp_context->sampler_state_table.bo, True);
2466     assert(pp_context->sampler_state_table.bo->virtual);
2467     sampler_state = pp_context->sampler_state_table.bo->virtual;
2468
2469     /* SIMD16 Y index 1 */
2470     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2471     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2472     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2473     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2474     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2475
2476     /* SIMD16 UV index 2 */
2477     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2478     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2479     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2480     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2481     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2482
2483     dri_bo_unmap(pp_context->sampler_state_table.bo);
2484
2485     /* private function & data */
2486     pp_context->pp_x_steps = pp_scaling_x_steps;
2487     pp_context->pp_y_steps = pp_scaling_y_steps;
2488     pp_context->private_context = &pp_context->pp_scaling_context;
2489     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2490
2491     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2492     float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
2493     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2494     pp_scaling_context->dest_y = dst_rect->y;
2495     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2496     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2497     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
2498     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2499
2500     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2501
2502     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
2503     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2504     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2505
2506     dst_surface->flags = src_surface->flags;
2507
2508     return VA_STATUS_SUCCESS;
2509 }
2510
2511 static int
2512 pp_avs_x_steps(void *private_context)
2513 {
2514     struct pp_avs_context *pp_avs_context = private_context;
2515
2516     return pp_avs_context->dest_w / 16;
2517 }
2518
2519 static int
2520 pp_avs_y_steps(void *private_context)
2521 {
2522     return 1;
2523 }
2524
2525 static int
2526 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2527 {
2528     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2529     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2530     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2531     float src_x_steping, src_y_steping, video_step_delta;
2532     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2533
2534     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2535         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2536         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2537     } else if (tmp_w >= pp_avs_context->dest_w) {
2538         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2539         pp_inline_parameter->grf6.video_step_delta = 0;
2540
2541         if (x == 0) {
2542             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2543                                                                                                pp_avs_context->src_normalized_x;
2544         } else {
2545             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2546             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2547             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2548                                                                                                 16 * 15 * video_step_delta / 2;
2549         }
2550     } else {
2551         int n0, n1, n2, nls_left, nls_right;
2552         int factor_a = 5, factor_b = 4;
2553         float f;
2554
2555         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2556         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2557         n2 = tmp_w / (16 * factor_a);
2558         nls_left = n0 + n2;
2559         nls_right = n1 + n2;
2560         f = (float) n2 * 16 / tmp_w;
2561
2562         if (n0 < 5) {
2563             pp_inline_parameter->grf6.video_step_delta = 0.0;
2564
2565             if (x == 0) {
2566                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2567                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2568             } else {
2569                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2570                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2571                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2572                                                                                                     16 * 15 * video_step_delta / 2;
2573             }
2574         } else {
2575             if (x < nls_left) {
2576                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2577                 float a = f / (nls_left * 16 * factor_b);
2578                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2579
2580                 pp_inline_parameter->grf6.video_step_delta = b;
2581
2582                 if (x == 0) {
2583                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2584                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2585                 } else {
2586                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2587                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2588                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2589                                                                                                         16 * 15 * video_step_delta / 2;
2590                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2591                 }
2592             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2593                 /* scale the center linearly */
2594                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2595                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2596                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2597                                                                                                     16 * 15 * video_step_delta / 2;
2598                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2599                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2600             } else {
2601                 float a = f / (nls_right * 16 * factor_b);
2602                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2603
2604                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2605                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2606                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2607                                                                                                     16 * 15 * video_step_delta / 2;
2608                 pp_inline_parameter->grf6.video_step_delta = -b;
2609
2610                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2611                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2612                 else
2613                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2614             }
2615         }
2616     }
2617
2618     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2619     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2620     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2621     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2622
2623     return 0;
2624 }
2625
2626 static const AVSConfig gen5_avs_config = {
2627     .coeff_frac_bits = 6,
2628     .coeff_epsilon = 1.0f / (1U << 6),
2629     .num_phases = 16,
2630     .num_luma_coeffs = 8,
2631     .num_chroma_coeffs = 4,
2632
2633     .coeff_range = {
2634         .lower_bound = {
2635             .y_k_h = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2636             .y_k_v = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2637             .uv_k_h = { -1, 0, 0, -1 },
2638             .uv_k_v = { -1, 0, 0, -1 },
2639         },
2640         .upper_bound = {
2641             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2642             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2643             .uv_k_h = { 1, 2, 2, 1 },
2644             .uv_k_v = { 1, 2, 2, 1 },
2645         },
2646     },
2647 };
2648
2649 static const AVSConfig gen6_avs_config = {
2650     .coeff_frac_bits = 6,
2651     .coeff_epsilon = 1.0f / (1U << 6),
2652     .num_phases = 16,
2653     .num_luma_coeffs = 8,
2654     .num_chroma_coeffs = 4,
2655
2656     .coeff_range = {
2657         .lower_bound = {
2658             .y_k_h = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2659             .y_k_v = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2660             .uv_k_h = { -1, 0, 0, -1 },
2661             .uv_k_v = { -1, 0, 0, -1 },
2662         },
2663         .upper_bound = {
2664             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2665             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2666             .uv_k_h = { 1, 2, 2, 1 },
2667             .uv_k_v = { 1, 2, 2, 1 },
2668         },
2669     },
2670 };
2671
2672 static VAStatus
2673 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2674                        const struct i965_surface *src_surface,
2675                        const VARectangle *src_rect,
2676                        struct i965_surface *dst_surface,
2677                        const VARectangle *dst_rect,
2678                        void *filter_param)
2679 {
2680     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2681     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2682     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2683     struct object_surface *obj_surface;
2684     struct i965_sampler_8x8 *sampler_8x8;
2685     struct i965_sampler_8x8_state *sampler_8x8_state;
2686     int index;
2687     int in_w, in_h, in_wpitch, in_hpitch;
2688     int out_w, out_h, out_wpitch, out_hpitch;
2689     int i;
2690     AVSState * const avs = &pp_avs_context->state;
2691     float sx, sy;
2692
2693     const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
2694                      VA_FILTER_SCALING_NL_ANAMORPHIC;
2695
2696     /* surface */
2697     obj_surface = (struct object_surface *)src_surface->base;
2698     in_w = obj_surface->orig_width;
2699     in_h = obj_surface->orig_height;
2700     in_wpitch = obj_surface->width;
2701     in_hpitch = obj_surface->height;
2702
2703     /* source Y surface index 1 */
2704     i965_pp_set_surface2_state(ctx, pp_context,
2705                                obj_surface->bo, 0,
2706                                in_w, in_h, in_wpitch,
2707                                0, 0,
2708                                SURFACE_FORMAT_Y8_UNORM, 0,
2709                                1);
2710
2711     /* source UV surface index 2 */
2712     i965_pp_set_surface2_state(ctx, pp_context,
2713                                obj_surface->bo, in_wpitch * in_hpitch,
2714                                in_w / 2, in_h / 2, in_wpitch,
2715                                0, 0,
2716                                SURFACE_FORMAT_R8B8_UNORM, 0,
2717                                2);
2718
2719     /* destination surface */
2720     obj_surface = (struct object_surface *)dst_surface->base;
2721     out_w = obj_surface->orig_width;
2722     out_h = obj_surface->orig_height;
2723     out_wpitch = obj_surface->width;
2724     out_hpitch = obj_surface->height;
2725     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2726
2727     /* destination Y surface index 7 */
2728     i965_pp_set_surface_state(ctx, pp_context,
2729                               obj_surface->bo, 0,
2730                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2731                               7, 1);
2732
2733     /* destination UV surface index 8 */
2734     i965_pp_set_surface_state(ctx, pp_context,
2735                               obj_surface->bo, out_wpitch * out_hpitch,
2736                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2737                               8, 1);
2738
2739     /* sampler 8x8 state */
2740     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2741     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2742     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2743     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2744     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2745
2746     sx = (float)dst_rect->width / src_rect->width;
2747     sy = (float)dst_rect->height / src_rect->height;
2748     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
2749
2750     assert(avs->config->num_phases == 16);
2751     for (i = 0; i <= 16; i++) {
2752         const AVSCoeffs * const coeffs = &avs->coeffs[i];
2753
2754         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
2755             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
2756         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
2757             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
2758         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
2759             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
2760         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
2761             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
2762         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
2763             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
2764         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
2765             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
2766         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
2767             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
2768         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
2769             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
2770
2771         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
2772             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
2773         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
2774             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
2775         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
2776             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
2777         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
2778             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
2779
2780         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
2781             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
2782         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
2783             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
2784         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
2785             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
2786         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
2787             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
2788         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
2789             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
2790         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
2791             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
2792         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
2793             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
2794         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
2795             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
2796
2797         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
2798             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
2799         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
2800             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
2801         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
2802             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
2803         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
2804             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
2805     }
2806
2807     /* Adaptive filter for all channels (DW4.15) */
2808     sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
2809
2810     sampler_8x8_state->dw136.default_sharpness_level =
2811         -avs_is_needed(pp_context->filter_flags);
2812     sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
2813     sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
2814     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2815
2816     /* sampler 8x8 */
2817     dri_bo_map(pp_context->sampler_state_table.bo, True);
2818     assert(pp_context->sampler_state_table.bo->virtual);
2819     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2820     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2821
2822     /* sample_8x8 Y index 1 */
2823     index = 1;
2824     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2825     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2826     sampler_8x8[index].dw0.ief_bypass = 1;
2827     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2828     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2829     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2830     sampler_8x8[index].dw2.global_noise_estimation = 22;
2831     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2832     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2833     sampler_8x8[index].dw3.strong_edge_weight = 7;
2834     sampler_8x8[index].dw3.regular_weight = 2;
2835     sampler_8x8[index].dw3.non_edge_weight = 0;
2836     sampler_8x8[index].dw3.gain_factor = 40;
2837     sampler_8x8[index].dw4.steepness_boost = 0;
2838     sampler_8x8[index].dw4.steepness_threshold = 0;
2839     sampler_8x8[index].dw4.mr_boost = 0;
2840     sampler_8x8[index].dw4.mr_threshold = 5;
2841     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2842     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2843     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2844     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2845     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2846     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2847     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2848     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2849     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2850     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2851     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2852     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2853     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2854     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2855     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2856     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2857     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2858     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2859     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2860     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2861     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2862     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2863     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2864     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2865     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2866     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2867     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2868     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2869     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2870     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2871     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2872     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2873     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2874     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2875     sampler_8x8[index].dw13.limiter_boost = 0;
2876     sampler_8x8[index].dw13.minimum_limiter = 10;
2877     sampler_8x8[index].dw13.maximum_limiter = 11;
2878     sampler_8x8[index].dw14.clip_limiter = 130;
2879     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2880                       I915_GEM_DOMAIN_RENDER,
2881                       0,
2882                       0,
2883                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2884                       pp_context->sampler_state_table.bo_8x8);
2885
2886     /* sample_8x8 UV index 2 */
2887     index = 2;
2888     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2889     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2890     sampler_8x8[index].dw0.ief_bypass = 1;
2891     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2892     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2893     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2894     sampler_8x8[index].dw2.global_noise_estimation = 22;
2895     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2896     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2897     sampler_8x8[index].dw3.strong_edge_weight = 7;
2898     sampler_8x8[index].dw3.regular_weight = 2;
2899     sampler_8x8[index].dw3.non_edge_weight = 0;
2900     sampler_8x8[index].dw3.gain_factor = 40;
2901     sampler_8x8[index].dw4.steepness_boost = 0;
2902     sampler_8x8[index].dw4.steepness_threshold = 0;
2903     sampler_8x8[index].dw4.mr_boost = 0;
2904     sampler_8x8[index].dw4.mr_threshold = 5;
2905     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2906     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2907     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2908     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2909     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2910     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2911     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2912     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2913     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2914     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2915     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2916     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2917     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2918     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2919     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2920     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2921     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2922     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2923     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2924     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2925     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2926     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2927     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2928     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2929     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2930     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2931     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2932     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2933     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2934     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2935     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2936     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2937     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2938     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2939     sampler_8x8[index].dw13.limiter_boost = 0;
2940     sampler_8x8[index].dw13.minimum_limiter = 10;
2941     sampler_8x8[index].dw13.maximum_limiter = 11;
2942     sampler_8x8[index].dw14.clip_limiter = 130;
2943     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2944                       I915_GEM_DOMAIN_RENDER,
2945                       0,
2946                       0,
2947                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2948                       pp_context->sampler_state_table.bo_8x8);
2949
2950     dri_bo_unmap(pp_context->sampler_state_table.bo);
2951
2952     /* private function & data */
2953     pp_context->pp_x_steps = pp_avs_x_steps;
2954     pp_context->pp_y_steps = pp_avs_y_steps;
2955     pp_context->private_context = &pp_context->pp_avs_context;
2956     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2957
2958     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2959     float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
2960     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2961     pp_avs_context->dest_y = dst_rect->y;
2962     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2963     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2964     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
2965     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2966     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2967     pp_avs_context->src_h = src_rect->height;
2968
2969     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2970     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2971
2972     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
2973     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2974     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2975     pp_inline_parameter->grf6.video_step_delta = 0.0;
2976
2977     dst_surface->flags = src_surface->flags;
2978
2979     return VA_STATUS_SUCCESS;
2980 }
2981
2982 static VAStatus
2983 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2984                              const struct i965_surface *src_surface,
2985                              const VARectangle *src_rect,
2986                              struct i965_surface *dst_surface,
2987                              const VARectangle *dst_rect,
2988                              void *filter_param)
2989 {
2990     return pp_nv12_avs_initialize(ctx, pp_context,
2991                                   src_surface,
2992                                   src_rect,
2993                                   dst_surface,
2994                                   dst_rect,
2995                                   filter_param);
2996 }
2997
2998 static int
2999 gen7_pp_avs_x_steps(void *private_context)
3000 {
3001     struct pp_avs_context *pp_avs_context = private_context;
3002
3003     return pp_avs_context->dest_w / 16;
3004 }
3005
3006 static int
3007 gen7_pp_avs_y_steps(void *private_context)
3008 {
3009     struct pp_avs_context *pp_avs_context = private_context;
3010
3011     return pp_avs_context->dest_h / 16;
3012 }
3013
3014 static int
3015 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3016 {
3017     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3018     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3019
3020     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3021     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3022     pp_inline_parameter->grf9.constant_0 = 0xffffffff;
3023     pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3024
3025     return 0;
3026 }
3027
3028 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx,
3029                                               struct i965_post_processing_context *pp_context,
3030                                               const struct i965_surface *surface)
3031 {
3032     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3033     int fourcc = pp_get_surface_fourcc(ctx, surface);
3034
3035     if (fourcc == VA_FOURCC_YUY2) {
3036         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3037         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3038         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3039     } else if (fourcc == VA_FOURCC_UYVY) {
3040         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3041         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3042         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3043     }
3044 }
3045
3046 static VAStatus
3047 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3048                            const struct i965_surface *src_surface,
3049                            const VARectangle *src_rect,
3050                            struct i965_surface *dst_surface,
3051                            const VARectangle *dst_rect,
3052                            void *filter_param)
3053 {
3054     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3055     struct i965_driver_data *i965 = i965_driver_data(ctx);
3056     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3057     struct gen7_sampler_8x8 *sampler_8x8;
3058     struct i965_sampler_8x8_state *sampler_8x8_state;
3059     int index, i;
3060     int width[3], height[3], pitch[3], offset[3];
3061     int src_width, src_height;
3062     AVSState * const avs = &pp_avs_context->state;
3063     float sx, sy;
3064     const float * yuv_to_rgb_coefs;
3065     size_t yuv_to_rgb_coefs_size;
3066
3067     /* source surface */
3068     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3069                                          src_rect,
3070                                          width, height, pitch, offset);
3071     src_width = width[0];
3072     src_height = height[0];
3073
3074     /* destination surface */
3075     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3076                                          dst_rect,
3077                                          width, height, pitch, offset);
3078
3079     /* sampler 8x8 state */
3080     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3081     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3082     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3083     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3084     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3085
3086     sx = (float)dst_rect->width / src_rect->width;
3087     sy = (float)dst_rect->height / src_rect->height;
3088     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
3089
3090     assert(avs->config->num_phases == 16);
3091     for (i = 0; i <= 16; i++) {
3092         const AVSCoeffs * const coeffs = &avs->coeffs[i];
3093
3094         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
3095             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
3096         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
3097             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
3098         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
3099             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
3100         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
3101             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
3102         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
3103             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
3104         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
3105             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
3106         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
3107             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
3108         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
3109             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
3110
3111         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
3112             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
3113         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
3114             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
3115         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
3116             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
3117         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
3118             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
3119
3120         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
3121             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
3122         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
3123             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
3124         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
3125             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
3126         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
3127             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
3128         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
3129             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
3130         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
3131             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
3132         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
3133             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
3134         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
3135             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
3136
3137         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
3138             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
3139         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
3140             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
3141         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
3142             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
3143         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
3144             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
3145     }
3146
3147     sampler_8x8_state->dw136.default_sharpness_level =
3148         -avs_is_needed(pp_context->filter_flags);
3149     if (IS_HASWELL(i965->intel.device_info)) {
3150         sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
3151         sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
3152         sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
3153     } else {
3154         sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
3155         sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
3156         sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
3157     }
3158     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3159
3160     /* sampler 8x8 */
3161     dri_bo_map(pp_context->sampler_state_table.bo, True);
3162     assert(pp_context->sampler_state_table.bo->virtual);
3163     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3164     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3165
3166     /* sample_8x8 Y index 4 */
3167     index = 4;
3168     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3169     sampler_8x8[index].dw0.global_noise_estimation = 255;
3170     sampler_8x8[index].dw0.ief_bypass = 1;
3171
3172     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3173
3174     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3175     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3176     sampler_8x8[index].dw2.r5x_coefficient = 9;
3177     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3178     sampler_8x8[index].dw2.r5c_coefficient = 3;
3179
3180     sampler_8x8[index].dw3.r3x_coefficient = 27;
3181     sampler_8x8[index].dw3.r3c_coefficient = 5;
3182     sampler_8x8[index].dw3.gain_factor = 40;
3183     sampler_8x8[index].dw3.non_edge_weight = 1;
3184     sampler_8x8[index].dw3.regular_weight = 2;
3185     sampler_8x8[index].dw3.strong_edge_weight = 7;
3186     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3187
3188     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3189                       I915_GEM_DOMAIN_RENDER,
3190                       0,
3191                       0,
3192                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3193                       pp_context->sampler_state_table.bo_8x8);
3194
3195     /* sample_8x8 UV index 8 */
3196     index = 8;
3197     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3198     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3199     sampler_8x8[index].dw0.global_noise_estimation = 255;
3200     sampler_8x8[index].dw0.ief_bypass = 1;
3201     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3202     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3203     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3204     sampler_8x8[index].dw2.r5x_coefficient = 9;
3205     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3206     sampler_8x8[index].dw2.r5c_coefficient = 3;
3207     sampler_8x8[index].dw3.r3x_coefficient = 27;
3208     sampler_8x8[index].dw3.r3c_coefficient = 5;
3209     sampler_8x8[index].dw3.gain_factor = 40;
3210     sampler_8x8[index].dw3.non_edge_weight = 1;
3211     sampler_8x8[index].dw3.regular_weight = 2;
3212     sampler_8x8[index].dw3.strong_edge_weight = 7;
3213     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3214
3215     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3216                       I915_GEM_DOMAIN_RENDER,
3217                       0,
3218                       0,
3219                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3220                       pp_context->sampler_state_table.bo_8x8);
3221
3222     /* sampler_8x8 V, index 12 */
3223     index = 12;
3224     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3225     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3226     sampler_8x8[index].dw0.global_noise_estimation = 255;
3227     sampler_8x8[index].dw0.ief_bypass = 1;
3228     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3229     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3230     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3231     sampler_8x8[index].dw2.r5x_coefficient = 9;
3232     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3233     sampler_8x8[index].dw2.r5c_coefficient = 3;
3234     sampler_8x8[index].dw3.r3x_coefficient = 27;
3235     sampler_8x8[index].dw3.r3c_coefficient = 5;
3236     sampler_8x8[index].dw3.gain_factor = 40;
3237     sampler_8x8[index].dw3.non_edge_weight = 1;
3238     sampler_8x8[index].dw3.regular_weight = 2;
3239     sampler_8x8[index].dw3.strong_edge_weight = 7;
3240     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3241
3242     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3243                       I915_GEM_DOMAIN_RENDER,
3244                       0,
3245                       0,
3246                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3247                       pp_context->sampler_state_table.bo_8x8);
3248
3249     dri_bo_unmap(pp_context->sampler_state_table.bo);
3250
3251     /* private function & data */
3252     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3253     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3254     pp_context->private_context = &pp_context->pp_avs_context;
3255     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3256
3257     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
3258     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3259     pp_avs_context->dest_y = dst_rect->y;
3260     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3261     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3262     pp_avs_context->src_w = src_rect->width;
3263     pp_avs_context->src_h = src_rect->height;
3264     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3265
3266     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3267     dw = MAX(dw, dst_rect->width + dst_left_edge_extend);
3268
3269     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3270     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3271     if (IS_HASWELL(i965->intel.device_info))
3272         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3273
3274     if (pp_static_parameter->grf2.avs_wa_enable) {
3275         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3276         if ((src_fourcc == VA_FOURCC_RGBA) ||
3277             (src_fourcc == VA_FOURCC_RGBX) ||
3278             (src_fourcc == VA_FOURCC_BGRA) ||
3279             (src_fourcc == VA_FOURCC_BGRX)) {
3280             pp_static_parameter->grf2.avs_wa_enable = 0;
3281         }
3282     }
3283
3284     pp_static_parameter->grf2.avs_wa_width = src_width;
3285     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3286     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3287     pp_static_parameter->grf2.alpha = 255;
3288
3289     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3290     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3291     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3292                                                                    (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3293     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3294                                                                      (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3295
3296     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3297
3298     yuv_to_rgb_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(src_surface->flags &
3299                                                                                   VA_SRC_COLOR_MASK),
3300                                                     &yuv_to_rgb_coefs_size);
3301     memcpy(&pp_static_parameter->grf7, yuv_to_rgb_coefs, yuv_to_rgb_coefs_size);
3302
3303     dst_surface->flags = src_surface->flags;
3304
3305     return VA_STATUS_SUCCESS;
3306 }
3307
3308 static int
3309 pp_dndi_x_steps(void *private_context)
3310 {
3311     return 1;
3312 }
3313
3314 static int
3315 pp_dndi_y_steps(void *private_context)
3316 {
3317     struct pp_dndi_context *pp_dndi_context = private_context;
3318
3319     return pp_dndi_context->dest_h / 4;
3320 }
3321
3322 static int
3323 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3324 {
3325     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3326
3327     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3328     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3329
3330     return 0;
3331 }
3332
3333 static VAStatus
3334 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3335                         const struct i965_surface *src_surface,
3336                         const VARectangle *src_rect,
3337                         struct i965_surface *dst_surface,
3338                         const VARectangle *dst_rect,
3339                         void *filter_param)
3340 {
3341     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3342     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3343     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3344     const VAProcPipelineParameterBuffer * const pipe_params =
3345         pp_context->pipeline_param;
3346     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3347         filter_param;
3348     struct object_surface * const src_obj_surface = (struct object_surface *)
3349                                                     src_surface->base;
3350     struct object_surface * const dst_obj_surface = (struct object_surface *)
3351                                                     dst_surface->base;
3352     struct object_surface *obj_surface;
3353     struct i965_sampler_dndi *sampler_dndi;
3354     int index, dndi_top_first;
3355     int w, h, orig_w, orig_h;
3356     VAStatus status;
3357
3358     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3359                                                  pipe_params, deint_params);
3360     if (status != VA_STATUS_SUCCESS)
3361         return status;
3362
3363     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3364                                              src_obj_surface, dst_obj_surface);
3365     if (status != VA_STATUS_SUCCESS)
3366         return status;
3367
3368     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3369                                                      src_obj_surface, dst_obj_surface);
3370     if (status != VA_STATUS_SUCCESS)
3371         return status;
3372
3373     /* Current input surface (index = 4) */
3374     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3375     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3376                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3377                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3378
3379     /* Previous input surface (index = 5) */
3380     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3381     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3382                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3383                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
3384
3385     /* STMM input surface (index = 6) */
3386     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3387     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3388                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3389                               I965_SURFACEFORMAT_R8_UNORM, 6, 1);
3390
3391     /* Previous output surfaces (index = { 7, 8 }) */
3392     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3393     w = obj_surface->width;
3394     h = obj_surface->height;
3395     orig_w = obj_surface->orig_width;
3396     orig_h = obj_surface->orig_height;
3397
3398     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3399                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
3400     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3401                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
3402
3403     /* Current output surfaces (index = { 10, 11 }) */
3404     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3405     w = obj_surface->width;
3406     h = obj_surface->height;
3407     orig_w = obj_surface->orig_width;
3408     orig_h = obj_surface->orig_height;
3409
3410     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3411                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
3412     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3413                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
3414
3415     /* STMM output surface (index = 20) */
3416     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3417     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3418                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3419                               I965_SURFACEFORMAT_R8_UNORM, 20, 1);
3420
3421     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3422
3423     /* sampler dndi */
3424     dri_bo_map(pp_context->sampler_state_table.bo, True);
3425     assert(pp_context->sampler_state_table.bo->virtual);
3426     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3427     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3428
3429     /* sample dndi index 1 */
3430     index = 0;
3431     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3432     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3433     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3434     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3435
3436     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3437     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3438     sampler_dndi[index].dw1.stmm_c2 = 1;
3439     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3440     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3441
3442     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3443     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3444     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3445     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3446
3447     sampler_dndi[index].dw3.maximum_stmm = 150;
3448     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3449     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3450     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3451     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3452
3453     sampler_dndi[index].dw4.sdi_delta = 5;
3454     sampler_dndi[index].dw4.sdi_threshold = 100;
3455     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3456     sampler_dndi[index].dw4.stmm_shift_up = 1;
3457     sampler_dndi[index].dw4.stmm_shift_down = 3;
3458     sampler_dndi[index].dw4.minimum_stmm = 118;
3459
3460     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3461     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3462     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3463     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3464
3465     sampler_dndi[index].dw6.dn_enable = 1;
3466     sampler_dndi[index].dw6.di_enable = 1;
3467     sampler_dndi[index].dw6.di_partial = 0;
3468     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3469     sampler_dndi[index].dw6.dndi_stream_id = 0;
3470     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3471     sampler_dndi[index].dw6.progressive_dn = 0;
3472     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3473     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3474     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3475
3476     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3477     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3478     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3479     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3480
3481     dri_bo_unmap(pp_context->sampler_state_table.bo);
3482
3483     /* private function & data */
3484     pp_context->pp_x_steps = pp_dndi_x_steps;
3485     pp_context->pp_y_steps = pp_dndi_y_steps;
3486     pp_context->private_context = dndi_ctx;
3487     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3488
3489     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3490     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3491     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3492     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3493
3494     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3495     pp_inline_parameter->grf5.number_blocks = w / 16;
3496     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3497     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3498
3499     dndi_ctx->dest_w = w;
3500     dndi_ctx->dest_h = h;
3501
3502     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3503     return VA_STATUS_SUCCESS;
3504 }
3505
3506 static int
3507 pp_dn_x_steps(void *private_context)
3508 {
3509     return 1;
3510 }
3511
3512 static int
3513 pp_dn_y_steps(void *private_context)
3514 {
3515     struct pp_dn_context *pp_dn_context = private_context;
3516
3517     return pp_dn_context->dest_h / 8;
3518 }
3519
3520 static int
3521 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3522 {
3523     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3524
3525     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3526     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3527
3528     return 0;
3529 }
3530
3531 static VAStatus
3532 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3533                       const struct i965_surface *src_surface,
3534                       const VARectangle *src_rect,
3535                       struct i965_surface *dst_surface,
3536                       const VARectangle *dst_rect,
3537                       void *filter_param)
3538 {
3539     struct i965_driver_data *i965 = i965_driver_data(ctx);
3540     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3541     struct object_surface *obj_surface;
3542     struct i965_sampler_dndi *sampler_dndi;
3543     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3544     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3545     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3546     int index;
3547     int w, h;
3548     int orig_w, orig_h;
3549     int dn_strength = 15;
3550     int dndi_top_first = 1;
3551     int dn_progressive = 0;
3552
3553     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3554         dndi_top_first = 1;
3555         dn_progressive = 1;
3556     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3557         dndi_top_first = 1;
3558         dn_progressive = 0;
3559     } else {
3560         dndi_top_first = 0;
3561         dn_progressive = 0;
3562     }
3563
3564     if (dn_filter_param) {
3565         float value = dn_filter_param->value;
3566
3567         if (value > 1.0)
3568             value = 1.0;
3569
3570         if (value < 0.0)
3571             value = 0.0;
3572
3573         dn_strength = (int)(value * 31.0F);
3574     }
3575
3576     /* surface */
3577     obj_surface = (struct object_surface *)src_surface->base;
3578     orig_w = obj_surface->orig_width;
3579     orig_h = obj_surface->orig_height;
3580     w = obj_surface->width;
3581     h = obj_surface->height;
3582
3583     if (pp_dn_context->stmm_bo == NULL) {
3584         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3585                                               "STMM surface",
3586                                               w * h,
3587                                               4096);
3588         assert(pp_dn_context->stmm_bo);
3589     }
3590
3591     /* source UV surface index 2 */
3592     i965_pp_set_surface_state(ctx, pp_context,
3593                               obj_surface->bo, w * h,
3594                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3595                               2, 0);
3596
3597     /* source YUV surface index 4 */
3598     i965_pp_set_surface2_state(ctx, pp_context,
3599                                obj_surface->bo, 0,
3600                                orig_w, orig_h, w,
3601                                0, h,
3602                                SURFACE_FORMAT_PLANAR_420_8, 1,
3603                                4);
3604
3605     /* source STMM surface index 20 */
3606     i965_pp_set_surface_state(ctx, pp_context,
3607                               pp_dn_context->stmm_bo, 0,
3608                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3609                               20, 1);
3610
3611     /* destination surface */
3612     obj_surface = (struct object_surface *)dst_surface->base;
3613     orig_w = obj_surface->orig_width;
3614     orig_h = obj_surface->orig_height;
3615     w = obj_surface->width;
3616     h = obj_surface->height;
3617
3618     /* destination Y surface index 7 */
3619     i965_pp_set_surface_state(ctx, pp_context,
3620                               obj_surface->bo, 0,
3621                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3622                               7, 1);
3623
3624     /* destination UV surface index 8 */
3625     i965_pp_set_surface_state(ctx, pp_context,
3626                               obj_surface->bo, w * h,
3627                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3628                               8, 1);
3629     /* sampler dn */
3630     dri_bo_map(pp_context->sampler_state_table.bo, True);
3631     assert(pp_context->sampler_state_table.bo->virtual);
3632     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3633     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3634
3635     /* sample dndi index 1 */
3636     index = 0;
3637     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3638     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3639     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3640     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3641
3642     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3643     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3644     sampler_dndi[index].dw1.stmm_c2 = 0;
3645     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3646     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3647
3648     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3649     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3650     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3651     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3652
3653     sampler_dndi[index].dw3.maximum_stmm = 128;
3654     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3655     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3656     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3657     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3658
3659     sampler_dndi[index].dw4.sdi_delta = 8;
3660     sampler_dndi[index].dw4.sdi_threshold = 128;
3661     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3662     sampler_dndi[index].dw4.stmm_shift_up = 0;
3663     sampler_dndi[index].dw4.stmm_shift_down = 0;
3664     sampler_dndi[index].dw4.minimum_stmm = 0;
3665
3666     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3667     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3668     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3669     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3670
3671     sampler_dndi[index].dw6.dn_enable = 1;
3672     sampler_dndi[index].dw6.di_enable = 0;
3673     sampler_dndi[index].dw6.di_partial = 0;
3674     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3675     sampler_dndi[index].dw6.dndi_stream_id = 1;
3676     sampler_dndi[index].dw6.dndi_first_frame = 1;
3677     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3678     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3679     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3680     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3681
3682     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3683     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3684     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3685     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3686
3687     dri_bo_unmap(pp_context->sampler_state_table.bo);
3688
3689     /* private function & data */
3690     pp_context->pp_x_steps = pp_dn_x_steps;
3691     pp_context->pp_y_steps = pp_dn_y_steps;
3692     pp_context->private_context = &pp_context->pp_dn_context;
3693     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3694
3695     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3696     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3697     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3698     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3699
3700     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3701     pp_inline_parameter->grf5.number_blocks = w / 16;
3702     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3703     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3704
3705     pp_dn_context->dest_w = w;
3706     pp_dn_context->dest_h = h;
3707
3708     dst_surface->flags = src_surface->flags;
3709
3710     return VA_STATUS_SUCCESS;
3711 }
3712
3713 static int
3714 gen7_pp_dndi_x_steps(void *private_context)
3715 {
3716     struct pp_dndi_context *pp_dndi_context = private_context;
3717
3718     return pp_dndi_context->dest_w / 16;
3719 }
3720
3721 static int
3722 gen7_pp_dndi_y_steps(void *private_context)
3723 {
3724     struct pp_dndi_context *pp_dndi_context = private_context;
3725
3726     return pp_dndi_context->dest_h / 4;
3727 }
3728
3729 static int
3730 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3731 {
3732     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3733
3734     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16;
3735     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 4;
3736
3737     return 0;
3738 }
3739
3740 static VAStatus
3741 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3742                              const struct i965_surface *src_surface,
3743                              const VARectangle *src_rect,
3744                              struct i965_surface *dst_surface,
3745                              const VARectangle *dst_rect,
3746                              void *filter_param)
3747 {
3748     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3749     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3750     const VAProcPipelineParameterBuffer * const pipe_params =
3751         pp_context->pipeline_param;
3752     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3753         filter_param;
3754     struct object_surface * const src_obj_surface = (struct object_surface *)
3755                                                     src_surface->base;
3756     struct object_surface * const dst_obj_surface = (struct object_surface *)
3757                                                     dst_surface->base;
3758     struct object_surface *obj_surface;
3759     struct gen7_sampler_dndi *sampler_dndi;
3760     int index, dndi_top_first;
3761     int w, h, orig_w, orig_h;
3762     VAStatus status;
3763
3764     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3765                                                  pipe_params, deint_params);
3766     if (status != VA_STATUS_SUCCESS)
3767         return status;
3768
3769     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3770                                              src_obj_surface, dst_obj_surface);
3771     if (status != VA_STATUS_SUCCESS)
3772         return status;
3773
3774     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3775                                                      src_obj_surface, dst_obj_surface);
3776     if (status != VA_STATUS_SUCCESS)
3777         return status;
3778
3779     /* Current input surface (index = 3) */
3780     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3781     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3782                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3783                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
3784
3785     /* Previous input surface (index = 4) */
3786     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3787     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3788                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3789                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3790
3791     /* STMM input surface (index = 5) */
3792     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3793     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3794                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3795                               I965_SURFACEFORMAT_R8_UNORM, 5, 1);
3796
3797     /* Previous output surfaces (index = { 27, 28 }) */
3798     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3799     w = obj_surface->width;
3800     h = obj_surface->height;
3801     orig_w = obj_surface->orig_width;
3802     orig_h = obj_surface->orig_height;
3803
3804     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3805                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
3806     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3807                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
3808
3809     /* Current output surfaces (index = { 30, 31 }) */
3810     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3811     w = obj_surface->width;
3812     h = obj_surface->height;
3813     orig_w = obj_surface->orig_width;
3814     orig_h = obj_surface->orig_height;
3815
3816     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3817                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
3818     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3819                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
3820
3821     /* STMM output surface (index = 33) */
3822     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3823     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3824                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3825                               I965_SURFACEFORMAT_R8_UNORM, 33, 1);
3826
3827     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3828
3829     /* sampler dndi */
3830     dri_bo_map(pp_context->sampler_state_table.bo, True);
3831     assert(pp_context->sampler_state_table.bo->virtual);
3832     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3833     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3834
3835     /* sample dndi index 0 */
3836     index = 0;
3837     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3838     sampler_dndi[index].dw0.dnmh_delt = 7;
3839     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3840     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3841     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3842     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3843
3844     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3845     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3846     sampler_dndi[index].dw1.stmm_c2 = 2;
3847     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3848     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3849
3850     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3851     sampler_dndi[index].dw2.bne_edge_th = 1;
3852     sampler_dndi[index].dw2.smooth_mv_th = 0;
3853     sampler_dndi[index].dw2.sad_tight_th = 5;
3854     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3855     sampler_dndi[index].dw2.good_neighbor_th = 12;
3856
3857     sampler_dndi[index].dw3.maximum_stmm = 150;
3858     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3859     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3860     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3861     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3862
3863     sampler_dndi[index].dw4.sdi_delta = 5;
3864     sampler_dndi[index].dw4.sdi_threshold = 100;
3865     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3866     sampler_dndi[index].dw4.stmm_shift_up = 1;
3867     sampler_dndi[index].dw4.stmm_shift_down = 3;
3868     sampler_dndi[index].dw4.minimum_stmm = 118;
3869
3870     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3871     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3872     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3873     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3874     sampler_dndi[index].dw6.dn_enable = 0;
3875     sampler_dndi[index].dw6.di_enable = 1;
3876     sampler_dndi[index].dw6.di_partial = 0;
3877     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3878     sampler_dndi[index].dw6.dndi_stream_id = 1;
3879     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3880     sampler_dndi[index].dw6.progressive_dn = 0;
3881     sampler_dndi[index].dw6.mcdi_enable =
3882         (deint_params->algorithm == VAProcDeinterlacingMotionCompensated);
3883     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3884     sampler_dndi[index].dw6.cat_th1 = 0;
3885     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3886     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3887
3888     sampler_dndi[index].dw7.sad_tha = 5;
3889     sampler_dndi[index].dw7.sad_thb = 10;
3890     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3891     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3892     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3893     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3894     sampler_dndi[index].dw7.neighborpixel_th = 10;
3895     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3896
3897     dri_bo_unmap(pp_context->sampler_state_table.bo);
3898
3899     /* private function & data */
3900     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3901     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3902     pp_context->private_context = dndi_ctx;
3903     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3904
3905     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3906     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3907     pp_static_parameter->grf1.di_top_field_first = 0;
3908     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3909
3910     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3911     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3912     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3913
3914     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3915     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3916
3917     dndi_ctx->dest_w = w;
3918     dndi_ctx->dest_h = h;
3919
3920     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3921     return VA_STATUS_SUCCESS;
3922 }
3923
3924 static int
3925 gen7_pp_dn_x_steps(void *private_context)
3926 {
3927     struct pp_dn_context *pp_dn_context = private_context;
3928
3929     return pp_dn_context->dest_w / 16;
3930 }
3931
3932 static int
3933 gen7_pp_dn_y_steps(void *private_context)
3934 {
3935     struct pp_dn_context *pp_dn_context = private_context;
3936
3937     return pp_dn_context->dest_h / 4;
3938 }
3939
3940 static int
3941 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3942 {
3943     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3944
3945     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3946     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3947
3948     return 0;
3949 }
3950
3951 static VAStatus
3952 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3953                            const struct i965_surface *src_surface,
3954                            const VARectangle *src_rect,
3955                            struct i965_surface *dst_surface,
3956                            const VARectangle *dst_rect,
3957                            void *filter_param)
3958 {
3959     struct i965_driver_data *i965 = i965_driver_data(ctx);
3960     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3961     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3962     struct object_surface *obj_surface;
3963     struct gen7_sampler_dndi *sampler_dn;
3964     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3965     int index;
3966     int w, h;
3967     int orig_w, orig_h;
3968     int dn_strength = 15;
3969     int dndi_top_first = 1;
3970     int dn_progressive = 0;
3971
3972     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3973         dndi_top_first = 1;
3974         dn_progressive = 1;
3975     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3976         dndi_top_first = 1;
3977         dn_progressive = 0;
3978     } else {
3979         dndi_top_first = 0;
3980         dn_progressive = 0;
3981     }
3982
3983     if (dn_filter_param) {
3984         float value = dn_filter_param->value;
3985
3986         if (value > 1.0)
3987             value = 1.0;
3988
3989         if (value < 0.0)
3990             value = 0.0;
3991
3992         dn_strength = (int)(value * 31.0F);
3993     }
3994
3995     /* surface */
3996     obj_surface = (struct object_surface *)src_surface->base;
3997     orig_w = obj_surface->orig_width;
3998     orig_h = obj_surface->orig_height;
3999     w = obj_surface->width;
4000     h = obj_surface->height;
4001
4002     if (pp_dn_context->stmm_bo == NULL) {
4003         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4004                                               "STMM surface",
4005                                               w * h,
4006                                               4096);
4007         assert(pp_dn_context->stmm_bo);
4008     }
4009
4010     /* source UV surface index 1 */
4011     gen7_pp_set_surface_state(ctx, pp_context,
4012                               obj_surface->bo, w * h,
4013                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4014                               1, 0);
4015
4016     /* source YUV surface index 3 */
4017     gen7_pp_set_surface2_state(ctx, pp_context,
4018                                obj_surface->bo, 0,
4019                                orig_w, orig_h, w,
4020                                0, h,
4021                                SURFACE_FORMAT_PLANAR_420_8, 1,
4022                                3);
4023
4024     /* source (temporal reference) YUV surface index 4 */
4025     gen7_pp_set_surface2_state(ctx, pp_context,
4026                                obj_surface->bo, 0,
4027                                orig_w, orig_h, w,
4028                                0, h,
4029                                SURFACE_FORMAT_PLANAR_420_8, 1,
4030                                4);
4031
4032     /* STMM / History Statistics input surface, index 5 */
4033     gen7_pp_set_surface_state(ctx, pp_context,
4034                               pp_dn_context->stmm_bo, 0,
4035                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4036                               33, 1);
4037
4038     /* destination surface */
4039     obj_surface = (struct object_surface *)dst_surface->base;
4040     orig_w = obj_surface->orig_width;
4041     orig_h = obj_surface->orig_height;
4042     w = obj_surface->width;
4043     h = obj_surface->height;
4044
4045     /* destination Y surface index 24 */
4046     gen7_pp_set_surface_state(ctx, pp_context,
4047                               obj_surface->bo, 0,
4048                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4049                               24, 1);
4050
4051     /* destination UV surface index 25 */
4052     gen7_pp_set_surface_state(ctx, pp_context,
4053                               obj_surface->bo, w * h,
4054                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4055                               25, 1);
4056
4057     /* sampler dn */
4058     dri_bo_map(pp_context->sampler_state_table.bo, True);
4059     assert(pp_context->sampler_state_table.bo->virtual);
4060     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4061     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4062
4063     /* sample dn index 1 */
4064     index = 0;
4065     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4066     sampler_dn[index].dw0.dnmh_delt = 8;
4067     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4068     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4069     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4070     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4071
4072     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4073     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4074     sampler_dn[index].dw1.stmm_c2 = 0;
4075     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4076     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4077
4078     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4079     sampler_dn[index].dw2.bne_edge_th = 1;
4080     sampler_dn[index].dw2.smooth_mv_th = 0;
4081     sampler_dn[index].dw2.sad_tight_th = 5;
4082     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4083     sampler_dn[index].dw2.good_neighbor_th = 4;
4084
4085     sampler_dn[index].dw3.maximum_stmm = 128;
4086     sampler_dn[index].dw3.multipler_for_vecm = 2;
4087     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4088     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4089     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4090
4091     sampler_dn[index].dw4.sdi_delta = 8;
4092     sampler_dn[index].dw4.sdi_threshold = 128;
4093     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4094     sampler_dn[index].dw4.stmm_shift_up = 0;
4095     sampler_dn[index].dw4.stmm_shift_down = 0;
4096     sampler_dn[index].dw4.minimum_stmm = 0;
4097
4098     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4099     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4100     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4101     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4102
4103     sampler_dn[index].dw6.dn_enable = 1;
4104     sampler_dn[index].dw6.di_enable = 0;
4105     sampler_dn[index].dw6.di_partial = 0;
4106     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4107     sampler_dn[index].dw6.dndi_stream_id = 1;
4108     sampler_dn[index].dw6.dndi_first_frame = 1;
4109     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4110     sampler_dn[index].dw6.mcdi_enable = 0;
4111     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4112     sampler_dn[index].dw6.cat_th1 = 0;
4113     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4114     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4115
4116     sampler_dn[index].dw7.sad_tha = 5;
4117     sampler_dn[index].dw7.sad_thb = 10;
4118     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4119     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4120     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4121     sampler_dn[index].dw7.vdi_walker_enable = 0;
4122     sampler_dn[index].dw7.neighborpixel_th = 10;
4123     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4124
4125     dri_bo_unmap(pp_context->sampler_state_table.bo);
4126
4127     /* private function & data */
4128     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4129     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4130     pp_context->private_context = &pp_context->pp_dn_context;
4131     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4132
4133     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4134     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4135     pp_static_parameter->grf1.di_top_field_first = 0;
4136     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4137
4138     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4139     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4140     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4141
4142     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4143     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4144
4145     pp_dn_context->dest_w = w;
4146     pp_dn_context->dest_h = h;
4147
4148     dst_surface->flags = src_surface->flags;
4149
4150     return VA_STATUS_SUCCESS;
4151 }
4152
4153 static VAStatus
4154 ironlake_pp_initialize(
4155     VADriverContextP ctx,
4156     struct i965_post_processing_context *pp_context,
4157     const struct i965_surface *src_surface,
4158     const VARectangle *src_rect,
4159     struct i965_surface *dst_surface,
4160     const VARectangle *dst_rect,
4161     int pp_index,
4162     void *filter_param
4163 )
4164 {
4165     VAStatus va_status;
4166     struct i965_driver_data *i965 = i965_driver_data(ctx);
4167     struct pp_module *pp_module;
4168     dri_bo *bo;
4169     int static_param_size, inline_param_size;
4170
4171     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4172     bo = dri_bo_alloc(i965->intel.bufmgr,
4173                       "surface state & binding table",
4174                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4175                       4096);
4176     assert(bo);
4177     pp_context->surface_state_binding_table.bo = bo;
4178
4179     dri_bo_unreference(pp_context->curbe.bo);
4180     bo = dri_bo_alloc(i965->intel.bufmgr,
4181                       "constant buffer",
4182                       4096,
4183                       4096);
4184     assert(bo);
4185     pp_context->curbe.bo = bo;
4186
4187     dri_bo_unreference(pp_context->idrt.bo);
4188     bo = dri_bo_alloc(i965->intel.bufmgr,
4189                       "interface discriptor",
4190                       sizeof(struct i965_interface_descriptor),
4191                       4096);
4192     assert(bo);
4193     pp_context->idrt.bo = bo;
4194     pp_context->idrt.num_interface_descriptors = 0;
4195
4196     dri_bo_unreference(pp_context->sampler_state_table.bo);
4197     bo = dri_bo_alloc(i965->intel.bufmgr,
4198                       "sampler state table",
4199                       4096,
4200                       4096);
4201     assert(bo);
4202     dri_bo_map(bo, True);
4203     memset(bo->virtual, 0, bo->size);
4204     dri_bo_unmap(bo);
4205     pp_context->sampler_state_table.bo = bo;
4206
4207     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4208     bo = dri_bo_alloc(i965->intel.bufmgr,
4209                       "sampler 8x8 state ",
4210                       4096,
4211                       4096);
4212     assert(bo);
4213     pp_context->sampler_state_table.bo_8x8 = bo;
4214
4215     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4216     bo = dri_bo_alloc(i965->intel.bufmgr,
4217                       "sampler 8x8 state ",
4218                       4096,
4219                       4096);
4220     assert(bo);
4221     pp_context->sampler_state_table.bo_8x8_uv = bo;
4222
4223     dri_bo_unreference(pp_context->vfe_state.bo);
4224     bo = dri_bo_alloc(i965->intel.bufmgr,
4225                       "vfe state",
4226                       sizeof(struct i965_vfe_state),
4227                       4096);
4228     assert(bo);
4229     pp_context->vfe_state.bo = bo;
4230
4231     static_param_size = sizeof(struct pp_static_parameter);
4232     inline_param_size = sizeof(struct pp_inline_parameter);
4233
4234     memset(pp_context->pp_static_parameter, 0, static_param_size);
4235     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4236
4237     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4238     pp_context->current_pp = pp_index;
4239     pp_module = &pp_context->pp_modules[pp_index];
4240
4241     if (pp_module->initialize)
4242         va_status = pp_module->initialize(ctx, pp_context,
4243                                           src_surface,
4244                                           src_rect,
4245                                           dst_surface,
4246                                           dst_rect,
4247                                           filter_param);
4248     else
4249         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4250
4251     return va_status;
4252 }
4253
4254 static VAStatus
4255 ironlake_post_processing(
4256     VADriverContextP   ctx,
4257     struct i965_post_processing_context *pp_context,
4258     const struct i965_surface *src_surface,
4259     const VARectangle *src_rect,
4260     struct i965_surface *dst_surface,
4261     const VARectangle *dst_rect,
4262     int                pp_index,
4263     void *filter_param
4264 )
4265 {
4266     VAStatus va_status;
4267
4268     va_status = ironlake_pp_initialize(ctx, pp_context,
4269                                        src_surface,
4270                                        src_rect,
4271                                        dst_surface,
4272                                        dst_rect,
4273                                        pp_index,
4274                                        filter_param);
4275
4276     if (va_status == VA_STATUS_SUCCESS) {
4277         ironlake_pp_states_setup(ctx, pp_context);
4278         ironlake_pp_pipeline_setup(ctx, pp_context);
4279     }
4280
4281     return va_status;
4282 }
4283
4284 static VAStatus
4285 gen6_pp_initialize(
4286     VADriverContextP ctx,
4287     struct i965_post_processing_context *pp_context,
4288     const struct i965_surface *src_surface,
4289     const VARectangle *src_rect,
4290     struct i965_surface *dst_surface,
4291     const VARectangle *dst_rect,
4292     int pp_index,
4293     void *filter_param
4294 )
4295 {
4296     VAStatus va_status;
4297     struct i965_driver_data *i965 = i965_driver_data(ctx);
4298     struct pp_module *pp_module;
4299     dri_bo *bo;
4300     int static_param_size, inline_param_size;
4301
4302     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4303     bo = dri_bo_alloc(i965->intel.bufmgr,
4304                       "surface state & binding table",
4305                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4306                       4096);
4307     assert(bo);
4308     pp_context->surface_state_binding_table.bo = bo;
4309
4310     dri_bo_unreference(pp_context->curbe.bo);
4311     bo = dri_bo_alloc(i965->intel.bufmgr,
4312                       "constant buffer",
4313                       4096,
4314                       4096);
4315     assert(bo);
4316     pp_context->curbe.bo = bo;
4317
4318     dri_bo_unreference(pp_context->idrt.bo);
4319     bo = dri_bo_alloc(i965->intel.bufmgr,
4320                       "interface discriptor",
4321                       sizeof(struct gen6_interface_descriptor_data),
4322                       4096);
4323     assert(bo);
4324     pp_context->idrt.bo = bo;
4325     pp_context->idrt.num_interface_descriptors = 0;
4326
4327     dri_bo_unreference(pp_context->sampler_state_table.bo);
4328     bo = dri_bo_alloc(i965->intel.bufmgr,
4329                       "sampler state table",
4330                       4096,
4331                       4096);
4332     assert(bo);
4333     dri_bo_map(bo, True);
4334     memset(bo->virtual, 0, bo->size);
4335     dri_bo_unmap(bo);
4336     pp_context->sampler_state_table.bo = bo;
4337
4338     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4339     bo = dri_bo_alloc(i965->intel.bufmgr,
4340                       "sampler 8x8 state ",
4341                       4096,
4342                       4096);
4343     assert(bo);
4344     pp_context->sampler_state_table.bo_8x8 = bo;
4345
4346     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4347     bo = dri_bo_alloc(i965->intel.bufmgr,
4348                       "sampler 8x8 state ",
4349                       4096,
4350                       4096);
4351     assert(bo);
4352     pp_context->sampler_state_table.bo_8x8_uv = bo;
4353
4354     dri_bo_unreference(pp_context->vfe_state.bo);
4355     bo = dri_bo_alloc(i965->intel.bufmgr,
4356                       "vfe state",
4357                       sizeof(struct i965_vfe_state),
4358                       4096);
4359     assert(bo);
4360     pp_context->vfe_state.bo = bo;
4361
4362     if (IS_GEN7(i965->intel.device_info)) {
4363         static_param_size = sizeof(struct gen7_pp_static_parameter);
4364         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4365     } else {
4366         static_param_size = sizeof(struct pp_static_parameter);
4367         inline_param_size = sizeof(struct pp_inline_parameter);
4368     }
4369
4370     memset(pp_context->pp_static_parameter, 0, static_param_size);
4371     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4372
4373     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4374     pp_context->current_pp = pp_index;
4375     pp_module = &pp_context->pp_modules[pp_index];
4376
4377     if (pp_module->initialize)
4378         va_status = pp_module->initialize(ctx, pp_context,
4379                                           src_surface,
4380                                           src_rect,
4381                                           dst_surface,
4382                                           dst_rect,
4383                                           filter_param);
4384     else
4385         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4386
4387     calculate_boundary_block_mask(pp_context, dst_rect);
4388
4389     return va_status;
4390 }
4391
4392
4393 static void
4394 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4395                                    struct i965_post_processing_context *pp_context)
4396 {
4397     struct i965_driver_data *i965 = i965_driver_data(ctx);
4398     struct gen6_interface_descriptor_data *desc;
4399     dri_bo *bo;
4400     int pp_index = pp_context->current_pp;
4401
4402     bo = pp_context->idrt.bo;
4403     dri_bo_map(bo, True);
4404     assert(bo->virtual);
4405     desc = bo->virtual;
4406     memset(desc, 0, sizeof(*desc));
4407     desc->desc0.kernel_start_pointer =
4408         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4409     desc->desc1.single_program_flow = 1;
4410     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4411     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4412     desc->desc2.sampler_state_pointer =
4413         pp_context->sampler_state_table.bo->offset >> 5;
4414     desc->desc3.binding_table_entry_count = 0;
4415     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4416     desc->desc4.constant_urb_entry_read_offset = 0;
4417
4418     if (IS_GEN7(i965->intel.device_info))
4419         desc->desc4.constant_urb_entry_read_length = 8; /* grf 1-8 */
4420     else
4421         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4422
4423     dri_bo_emit_reloc(bo,
4424                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4425                       0,
4426                       offsetof(struct gen6_interface_descriptor_data, desc0),
4427                       pp_context->pp_modules[pp_index].kernel.bo);
4428
4429     dri_bo_emit_reloc(bo,
4430                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4431                       desc->desc2.sampler_count << 2,
4432                       offsetof(struct gen6_interface_descriptor_data, desc2),
4433                       pp_context->sampler_state_table.bo);
4434
4435     dri_bo_unmap(bo);
4436     pp_context->idrt.num_interface_descriptors++;
4437 }
4438
4439 static void
4440 gen6_pp_upload_constants(VADriverContextP ctx,
4441                          struct i965_post_processing_context *pp_context)
4442 {
4443     struct i965_driver_data *i965 = i965_driver_data(ctx);
4444     unsigned char *constant_buffer;
4445     int param_size;
4446
4447     assert(sizeof(struct pp_static_parameter) == 128);
4448     assert(sizeof(struct gen7_pp_static_parameter) == 256);
4449
4450     if (IS_GEN7(i965->intel.device_info))
4451         param_size = sizeof(struct gen7_pp_static_parameter);
4452     else
4453         param_size = sizeof(struct pp_static_parameter);
4454
4455     dri_bo_map(pp_context->curbe.bo, 1);
4456     assert(pp_context->curbe.bo->virtual);
4457     constant_buffer = pp_context->curbe.bo->virtual;
4458     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4459     dri_bo_unmap(pp_context->curbe.bo);
4460 }
4461
4462 static void
4463 gen6_pp_states_setup(VADriverContextP ctx,
4464                      struct i965_post_processing_context *pp_context)
4465 {
4466     gen6_pp_interface_descriptor_table(ctx, pp_context);
4467     gen6_pp_upload_constants(ctx, pp_context);
4468 }
4469
4470 static void
4471 gen6_pp_pipeline_select(VADriverContextP ctx,
4472                         struct i965_post_processing_context *pp_context)
4473 {
4474     struct intel_batchbuffer *batch = pp_context->batch;
4475
4476     BEGIN_BATCH(batch, 1);
4477     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4478     ADVANCE_BATCH(batch);
4479 }
4480
4481 static void
4482 gen6_pp_state_base_address(VADriverContextP ctx,
4483                            struct i965_post_processing_context *pp_context)
4484 {
4485     struct intel_batchbuffer *batch = pp_context->batch;
4486
4487     BEGIN_BATCH(batch, 10);
4488     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4489     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4490     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4491     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4492     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4493     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4494     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4495     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4496     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4497     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4498     ADVANCE_BATCH(batch);
4499 }
4500
4501 static void
4502 gen6_pp_vfe_state(VADriverContextP ctx,
4503                   struct i965_post_processing_context *pp_context)
4504 {
4505     struct intel_batchbuffer *batch = pp_context->batch;
4506
4507     BEGIN_BATCH(batch, 8);
4508     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4509     OUT_BATCH(batch, 0);
4510     OUT_BATCH(batch,
4511               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4512               pp_context->vfe_gpu_state.num_urb_entries << 8);
4513     OUT_BATCH(batch, 0);
4514     OUT_BATCH(batch,
4515               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
4516               /* URB Entry Allocation Size, in 256 bits unit */
4517               (pp_context->vfe_gpu_state.curbe_allocation_size));
4518     /* CURBE Allocation Size, in 256 bits unit */
4519     OUT_BATCH(batch, 0);
4520     OUT_BATCH(batch, 0);
4521     OUT_BATCH(batch, 0);
4522     ADVANCE_BATCH(batch);
4523 }
4524
4525 static void
4526 gen6_pp_curbe_load(VADriverContextP ctx,
4527                    struct i965_post_processing_context *pp_context)
4528 {
4529     struct intel_batchbuffer *batch = pp_context->batch;
4530     struct i965_driver_data *i965 = i965_driver_data(ctx);
4531     int param_size;
4532
4533     if (IS_GEN7(i965->intel.device_info))
4534         param_size = sizeof(struct gen7_pp_static_parameter);
4535     else
4536         param_size = sizeof(struct pp_static_parameter);
4537
4538     BEGIN_BATCH(batch, 4);
4539     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4540     OUT_BATCH(batch, 0);
4541     OUT_BATCH(batch,
4542               param_size);
4543     OUT_RELOC(batch,
4544               pp_context->curbe.bo,
4545               I915_GEM_DOMAIN_INSTRUCTION, 0,
4546               0);
4547     ADVANCE_BATCH(batch);
4548 }
4549
4550 static void
4551 gen6_interface_descriptor_load(VADriverContextP ctx,
4552                                struct i965_post_processing_context *pp_context)
4553 {
4554     struct intel_batchbuffer *batch = pp_context->batch;
4555
4556     BEGIN_BATCH(batch, 4);
4557     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4558     OUT_BATCH(batch, 0);
4559     OUT_BATCH(batch,
4560               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4561     OUT_RELOC(batch,
4562               pp_context->idrt.bo,
4563               I915_GEM_DOMAIN_INSTRUCTION, 0,
4564               0);
4565     ADVANCE_BATCH(batch);
4566 }
4567
4568 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps)
4569 {
4570     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4571
4572     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4573     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4574     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4575     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4576     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4577     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4578
4579     /* 1 x N */
4580     if (x_steps == 1) {
4581         if (y == y_steps - 1) {
4582             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4583         } else {
4584             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4585         }
4586     }
4587
4588     /* M x 1 */
4589     if (y_steps == 1) {
4590         if (x == 0) { // all blocks in this group are on the left edge
4591             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4592             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
4593         } else if (x == x_steps - 1) {
4594             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4595             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4596         } else {
4597             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4598             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4599             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4600         }
4601     }
4602
4603 }
4604
4605 static void
4606 gen6_pp_object_walker(VADriverContextP ctx,
4607                       struct i965_post_processing_context *pp_context)
4608 {
4609     struct i965_driver_data *i965 = i965_driver_data(ctx);
4610     struct intel_batchbuffer *batch = pp_context->batch;
4611     int x, x_steps, y, y_steps;
4612     int param_size, command_length_in_dws;
4613     dri_bo *command_buffer;
4614     unsigned int *command_ptr;
4615
4616     if (IS_GEN7(i965->intel.device_info))
4617         param_size = sizeof(struct gen7_pp_inline_parameter);
4618     else
4619         param_size = sizeof(struct pp_inline_parameter);
4620
4621     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4622     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4623     command_length_in_dws = 6 + (param_size >> 2);
4624     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4625                                   "command objects buffer",
4626                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4627                                   4096);
4628
4629     dri_bo_map(command_buffer, 1);
4630     command_ptr = command_buffer->virtual;
4631
4632     for (y = 0; y < y_steps; y++) {
4633         for (x = 0; x < x_steps; x++) {
4634             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4635                 // some common block parameter update goes here, apply to all pp functions
4636                 if (IS_GEN6(i965->intel.device_info))
4637                     update_block_mask_parameter(pp_context, x, y, x_steps, y_steps);
4638
4639                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4640                 *command_ptr++ = 0;
4641                 *command_ptr++ = 0;
4642                 *command_ptr++ = 0;
4643                 *command_ptr++ = 0;
4644                 *command_ptr++ = 0;
4645                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4646                 command_ptr += (param_size >> 2);
4647             }
4648         }
4649     }
4650
4651     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4652         *command_ptr++ = 0;
4653
4654     *command_ptr = MI_BATCH_BUFFER_END;
4655
4656     dri_bo_unmap(command_buffer);
4657
4658     BEGIN_BATCH(batch, 2);
4659     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4660     OUT_RELOC(batch, command_buffer,
4661               I915_GEM_DOMAIN_COMMAND, 0,
4662               0);
4663     ADVANCE_BATCH(batch);
4664
4665     dri_bo_unreference(command_buffer);
4666
4667     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4668      * will cause control to pass back to ring buffer
4669      */
4670     intel_batchbuffer_end_atomic(batch);
4671     intel_batchbuffer_flush(batch);
4672     intel_batchbuffer_start_atomic(batch, 0x1000);
4673 }
4674
4675 static void
4676 gen6_pp_pipeline_setup(VADriverContextP ctx,
4677                        struct i965_post_processing_context *pp_context)
4678 {
4679     struct intel_batchbuffer *batch = pp_context->batch;
4680
4681     intel_batchbuffer_start_atomic(batch, 0x1000);
4682     intel_batchbuffer_emit_mi_flush(batch);
4683     gen6_pp_pipeline_select(ctx, pp_context);
4684     gen6_pp_state_base_address(ctx, pp_context);
4685     gen6_pp_vfe_state(ctx, pp_context);
4686     gen6_pp_curbe_load(ctx, pp_context);
4687     gen6_interface_descriptor_load(ctx, pp_context);
4688     gen6_pp_object_walker(ctx, pp_context);
4689     intel_batchbuffer_end_atomic(batch);
4690 }
4691
4692 static VAStatus
4693 gen6_post_processing(
4694     VADriverContextP ctx,
4695     struct i965_post_processing_context *pp_context,
4696     const struct i965_surface *src_surface,
4697     const VARectangle *src_rect,
4698     struct i965_surface *dst_surface,
4699     const VARectangle *dst_rect,
4700     int pp_index,
4701     void *filter_param
4702 )
4703 {
4704     VAStatus va_status;
4705
4706     va_status = gen6_pp_initialize(ctx, pp_context,
4707                                    src_surface,
4708                                    src_rect,
4709                                    dst_surface,
4710                                    dst_rect,
4711                                    pp_index,
4712                                    filter_param);
4713
4714     if (va_status == VA_STATUS_SUCCESS) {
4715         gen6_pp_states_setup(ctx, pp_context);
4716         gen6_pp_pipeline_setup(ctx, pp_context);
4717     }
4718
4719     if (va_status == VA_STATUS_SUCCESS_1)
4720         va_status = VA_STATUS_SUCCESS;
4721
4722     return va_status;
4723 }
4724
4725 static VAStatus
4726 i965_post_processing_internal(
4727     VADriverContextP   ctx,
4728     struct i965_post_processing_context *pp_context,
4729     const struct i965_surface *src_surface,
4730     const VARectangle *src_rect,
4731     struct i965_surface *dst_surface,
4732     const VARectangle *dst_rect,
4733     int                pp_index,
4734     void *filter_param
4735 )
4736 {
4737     VAStatus va_status;
4738
4739     if (pp_context && pp_context->intel_post_processing) {
4740         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4741                                                         src_surface, src_rect,
4742                                                         dst_surface, dst_rect,
4743                                                         pp_index, filter_param);
4744     } else {
4745         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4746     }
4747
4748     return va_status;
4749 }
4750
4751 static void
4752 rgb_to_yuv(unsigned int argb,
4753            unsigned char *y,
4754            unsigned char *u,
4755            unsigned char *v,
4756            unsigned char *a)
4757 {
4758     int r = ((argb >> 16) & 0xff);
4759     int g = ((argb >> 8) & 0xff);
4760     int b = ((argb >> 0) & 0xff);
4761
4762     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4763     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4764     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4765     *a = ((argb >> 24) & 0xff);
4766 }
4767
4768 static void
4769 i965_vpp_clear_surface(VADriverContextP ctx,
4770                        struct i965_post_processing_context *pp_context,
4771                        struct object_surface *obj_surface,
4772                        unsigned int color)
4773 {
4774     struct i965_driver_data *i965 = i965_driver_data(ctx);
4775     struct intel_batchbuffer *batch = pp_context->batch;
4776     unsigned int blt_cmd, br13;
4777     unsigned int tiling = 0, swizzle = 0;
4778     int pitch;
4779     unsigned char y, u, v, a = 0;
4780     int region_width, region_height;
4781
4782     /* Currently only support NV12 surface */
4783     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4784         return;
4785
4786     rgb_to_yuv(color, &y, &u, &v, &a);
4787
4788     if (a == 0)
4789         return;
4790
4791     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4792     blt_cmd = XY_COLOR_BLT_CMD;
4793     pitch = obj_surface->width;
4794
4795     if (tiling != I915_TILING_NONE) {
4796         assert(tiling == I915_TILING_Y);
4797         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4798         // pitch >>= 2;
4799     }
4800
4801     br13 = 0xf0 << 16;
4802     br13 |= BR13_8;
4803     br13 |= pitch;
4804
4805     if (IS_IRONLAKE(i965->intel.device_info)) {
4806         intel_batchbuffer_start_atomic(batch, 48);
4807         BEGIN_BATCH(batch, 12);
4808     } else {
4809         /* Will double-check the command if the new chipset is added */
4810         intel_batchbuffer_start_atomic_blt(batch, 48);
4811         BEGIN_BLT_BATCH(batch, 12);
4812     }
4813
4814     region_width = obj_surface->width;
4815     region_height = obj_surface->height;
4816
4817     OUT_BATCH(batch, blt_cmd);
4818     OUT_BATCH(batch, br13);
4819     OUT_BATCH(batch,
4820               0 << 16 |
4821               0);
4822     OUT_BATCH(batch,
4823               region_height << 16 |
4824               region_width);
4825     OUT_RELOC(batch, obj_surface->bo,
4826               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4827               0);
4828     OUT_BATCH(batch, y);
4829
4830     br13 = 0xf0 << 16;
4831     br13 |= BR13_565;
4832     br13 |= pitch;
4833
4834     region_width = obj_surface->width / 2;
4835     region_height = obj_surface->height / 2;
4836
4837     if (tiling == I915_TILING_Y) {
4838         region_height = ALIGN(obj_surface->height / 2, 32);
4839     }
4840
4841     OUT_BATCH(batch, blt_cmd);
4842     OUT_BATCH(batch, br13);
4843     OUT_BATCH(batch,
4844               0 << 16 |
4845               0);
4846     OUT_BATCH(batch,
4847               region_height << 16 |
4848               region_width);
4849     OUT_RELOC(batch, obj_surface->bo,
4850               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4851               obj_surface->width * obj_surface->y_cb_offset);
4852     OUT_BATCH(batch, v << 8 | u);
4853
4854     ADVANCE_BATCH(batch);
4855     intel_batchbuffer_end_atomic(batch);
4856 }
4857
4858 VAStatus
4859 i965_scaling_processing(
4860     VADriverContextP   ctx,
4861     struct object_surface *src_surface_obj,
4862     const VARectangle *src_rect,
4863     struct object_surface *dst_surface_obj,
4864     const VARectangle *dst_rect,
4865     unsigned int       va_flags)
4866 {
4867     VAStatus va_status = VA_STATUS_SUCCESS;
4868     struct i965_driver_data *i965 = i965_driver_data(ctx);
4869
4870     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4871     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4872
4873     if (HAS_VPP(i965)) {
4874         struct i965_surface src_surface;
4875         struct i965_surface dst_surface;
4876         struct i965_post_processing_context *pp_context;
4877         unsigned int filter_flags;
4878
4879         _i965LockMutex(&i965->pp_mutex);
4880
4881         src_surface.base = (struct object_base *)src_surface_obj;
4882         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4883         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4884         dst_surface.base = (struct object_base *)dst_surface_obj;
4885         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4886         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4887
4888         pp_context = i965->pp_context;
4889         filter_flags = pp_context->filter_flags;
4890         pp_context->filter_flags = va_flags;
4891
4892         va_status = i965_post_processing_internal(ctx, pp_context,
4893                                                   &src_surface, src_rect, &dst_surface, dst_rect,
4894                                                   avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
4895
4896         pp_context->filter_flags = filter_flags;
4897
4898         _i965UnlockMutex(&i965->pp_mutex);
4899     }
4900
4901     return va_status;
4902 }
4903
4904 VASurfaceID
4905 i965_post_processing(
4906     VADriverContextP   ctx,
4907     struct object_surface *obj_surface,
4908     const VARectangle *src_rect,
4909     const VARectangle *dst_rect,
4910     unsigned int       va_flags,
4911     int               *has_done_scaling,
4912     VARectangle *calibrated_rect
4913 )
4914 {
4915     struct i965_driver_data *i965 = i965_driver_data(ctx);
4916     VASurfaceID out_surface_id = VA_INVALID_ID;
4917     VASurfaceID tmp_id = VA_INVALID_ID;
4918
4919     *has_done_scaling = 0;
4920
4921     if (HAS_VPP(i965)) {
4922         VAStatus status;
4923         struct i965_surface src_surface;
4924         struct i965_surface dst_surface;
4925         struct i965_post_processing_context *pp_context;
4926
4927         /* Currently only support post processing for NV12 surface */
4928         if (obj_surface->fourcc != VA_FOURCC_NV12)
4929             return out_surface_id;
4930
4931         _i965LockMutex(&i965->pp_mutex);
4932
4933         pp_context = i965->pp_context;
4934         pp_context->filter_flags = va_flags;
4935         if (avs_is_needed(va_flags)) {
4936             VARectangle tmp_dst_rect;
4937
4938             if (out_surface_id != VA_INVALID_ID)
4939                 tmp_id = out_surface_id;
4940
4941             tmp_dst_rect.x = 0;
4942             tmp_dst_rect.y = 0;
4943             tmp_dst_rect.width = dst_rect->width;
4944             tmp_dst_rect.height = dst_rect->height;
4945             src_surface.base = (struct object_base *)obj_surface;
4946             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4947             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4948
4949             status = i965_CreateSurfaces(ctx,
4950                                          dst_rect->width,
4951                                          dst_rect->height,
4952                                          VA_RT_FORMAT_YUV420,
4953                                          1,
4954                                          &out_surface_id);
4955             assert(status == VA_STATUS_SUCCESS);
4956             obj_surface = SURFACE(out_surface_id);
4957             assert(obj_surface);
4958             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4959             i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
4960
4961             dst_surface.base = (struct object_base *)obj_surface;
4962             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4963             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4964
4965             i965_post_processing_internal(ctx, pp_context,
4966                                           &src_surface,
4967                                           src_rect,
4968                                           &dst_surface,
4969                                           &tmp_dst_rect,
4970                                           PP_NV12_AVS,
4971                                           NULL);
4972
4973             if (tmp_id != VA_INVALID_ID)
4974                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4975
4976             *has_done_scaling = 1;
4977             calibrated_rect->x = 0;
4978             calibrated_rect->y = 0;
4979             calibrated_rect->width = dst_rect->width;
4980             calibrated_rect->height = dst_rect->height;
4981         }
4982
4983         _i965UnlockMutex(&i965->pp_mutex);
4984     }
4985
4986     return out_surface_id;
4987 }
4988
4989 static VAStatus
4990 i965_image_pl2_processing(VADriverContextP ctx,
4991                           const struct i965_surface *src_surface,
4992                           const VARectangle *src_rect,
4993                           struct i965_surface *dst_surface,
4994                           const VARectangle *dst_rect);
4995
4996 static VAStatus
4997 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
4998                                    VAStatus(*i965_image_plx_nv12_processing)(
4999                                        VADriverContextP,
5000                                        const struct i965_surface *,
5001                                        const VARectangle *,
5002                                        struct i965_surface *,
5003                                        const VARectangle *),
5004                                    const struct i965_surface *src_surface,
5005                                    const VARectangle *src_rect,
5006                                    struct i965_surface *dst_surface,
5007                                    const VARectangle *dst_rect)
5008 {
5009     struct i965_driver_data *i965 = i965_driver_data(ctx);
5010     VAStatus status;
5011     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5012     struct object_surface *obj_surface = NULL;
5013     struct i965_surface tmp_surface;
5014     int width, height;
5015
5016     pp_get_surface_size(ctx, dst_surface, &width, &height);
5017     status = i965_CreateSurfaces(ctx,
5018                                  width,
5019                                  height,
5020                                  VA_RT_FORMAT_YUV420,
5021                                  1,
5022                                  &tmp_surface_id);
5023     assert(status == VA_STATUS_SUCCESS);
5024     obj_surface = SURFACE(tmp_surface_id);
5025     assert(obj_surface);
5026     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5027
5028     tmp_surface.base = (struct object_base *)obj_surface;
5029     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5030     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5031
5032     status = i965_image_plx_nv12_processing(ctx,
5033                                             src_surface,
5034                                             src_rect,
5035                                             &tmp_surface,
5036                                             dst_rect);
5037
5038     if (status == VA_STATUS_SUCCESS)
5039         status = i965_image_pl2_processing(ctx,
5040                                            &tmp_surface,
5041                                            dst_rect,
5042                                            dst_surface,
5043                                            dst_rect);
5044
5045     i965_DestroySurfaces(ctx,
5046                          &tmp_surface_id,
5047                          1);
5048
5049     return status;
5050 }
5051
5052
5053 static VAStatus
5054 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5055                                const struct i965_surface *src_surface,
5056                                const VARectangle *src_rect,
5057                                struct i965_surface *dst_surface,
5058                                const VARectangle *dst_rect)
5059 {
5060     struct i965_driver_data *i965 = i965_driver_data(ctx);
5061     struct i965_post_processing_context *pp_context = i965->pp_context;
5062     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5063     VAStatus vaStatus;
5064
5065     switch (fourcc) {
5066     case VA_FOURCC_NV12:
5067         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5068                                                  src_surface,
5069                                                  src_rect,
5070                                                  dst_surface,
5071                                                  dst_rect,
5072                                                  PP_RGBX_LOAD_SAVE_NV12,
5073                                                  NULL);
5074         intel_batchbuffer_flush(pp_context->batch);
5075         break;
5076
5077     default:
5078         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5079                                                       i965_image_pl1_rgbx_processing,
5080                                                       src_surface,
5081                                                       src_rect,
5082                                                       dst_surface,
5083                                                       dst_rect);
5084         break;
5085     }
5086
5087     return vaStatus;
5088 }
5089
5090 static VAStatus
5091 i965_image_pl3_processing(VADriverContextP ctx,
5092                           const struct i965_surface *src_surface,
5093                           const VARectangle *src_rect,
5094                           struct i965_surface *dst_surface,
5095                           const VARectangle *dst_rect)
5096 {
5097     struct i965_driver_data *i965 = i965_driver_data(ctx);
5098     struct i965_post_processing_context *pp_context = i965->pp_context;
5099     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5100     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5101
5102     switch (fourcc) {
5103     case VA_FOURCC_NV12:
5104         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5105                                                  src_surface,
5106                                                  src_rect,
5107                                                  dst_surface,
5108                                                  dst_rect,
5109                                                  PP_PL3_LOAD_SAVE_N12,
5110                                                  NULL);
5111         intel_batchbuffer_flush(pp_context->batch);
5112         break;
5113
5114     case VA_FOURCC_IMC1:
5115     case VA_FOURCC_IMC3:
5116     case VA_FOURCC_YV12:
5117     case VA_FOURCC_I420:
5118         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5119                                                  src_surface,
5120                                                  src_rect,
5121                                                  dst_surface,
5122                                                  dst_rect,
5123                                                  PP_PL3_LOAD_SAVE_PL3,
5124                                                  NULL);
5125         intel_batchbuffer_flush(pp_context->batch);
5126         break;
5127
5128     case VA_FOURCC_YUY2:
5129     case VA_FOURCC_UYVY:
5130         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5131                                                  src_surface,
5132                                                  src_rect,
5133                                                  dst_surface,
5134                                                  dst_rect,
5135                                                  PP_PL3_LOAD_SAVE_PA,
5136                                                  NULL);
5137         intel_batchbuffer_flush(pp_context->batch);
5138         break;
5139
5140     default:
5141         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5142                                                       i965_image_pl3_processing,
5143                                                       src_surface,
5144                                                       src_rect,
5145                                                       dst_surface,
5146                                                       dst_rect);
5147         break;
5148     }
5149
5150     return vaStatus;
5151 }
5152
5153 static VAStatus
5154 i965_image_pl2_processing(VADriverContextP ctx,
5155                           const struct i965_surface *src_surface,
5156                           const VARectangle *src_rect,
5157                           struct i965_surface *dst_surface,
5158                           const VARectangle *dst_rect)
5159 {
5160     struct i965_driver_data *i965 = i965_driver_data(ctx);
5161     struct i965_post_processing_context *pp_context = i965->pp_context;
5162     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5163     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5164
5165     switch (fourcc) {
5166     case VA_FOURCC_NV12:
5167         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5168                                                  src_surface,
5169                                                  src_rect,
5170                                                  dst_surface,
5171                                                  dst_rect,
5172                                                  PP_NV12_LOAD_SAVE_N12,
5173                                                  NULL);
5174         break;
5175
5176     case VA_FOURCC_IMC1:
5177     case VA_FOURCC_IMC3:
5178     case VA_FOURCC_YV12:
5179     case VA_FOURCC_I420:
5180         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5181                                                  src_surface,
5182                                                  src_rect,
5183                                                  dst_surface,
5184                                                  dst_rect,
5185                                                  PP_NV12_LOAD_SAVE_PL3,
5186                                                  NULL);
5187         break;
5188
5189     case VA_FOURCC_YUY2:
5190     case VA_FOURCC_UYVY:
5191         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5192                                                  src_surface,
5193                                                  src_rect,
5194                                                  dst_surface,
5195                                                  dst_rect,
5196                                                  PP_NV12_LOAD_SAVE_PA,
5197                                                  NULL);
5198         break;
5199
5200     case VA_FOURCC_BGRX:
5201     case VA_FOURCC_BGRA:
5202     case VA_FOURCC_RGBX:
5203     case VA_FOURCC_RGBA:
5204         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5205                                                  src_surface,
5206                                                  src_rect,
5207                                                  dst_surface,
5208                                                  dst_rect,
5209                                                  PP_NV12_LOAD_SAVE_RGBX,
5210                                                  NULL);
5211         break;
5212
5213     default:
5214         return VA_STATUS_ERROR_UNIMPLEMENTED;
5215     }
5216
5217     intel_batchbuffer_flush(pp_context->batch);
5218
5219     return vaStatus;
5220 }
5221
5222 static VAStatus
5223 i965_image_pl1_processing(VADriverContextP ctx,
5224                           const struct i965_surface *src_surface,
5225                           const VARectangle *src_rect,
5226                           struct i965_surface *dst_surface,
5227                           const VARectangle *dst_rect)
5228 {
5229     struct i965_driver_data *i965 = i965_driver_data(ctx);
5230     struct i965_post_processing_context *pp_context = i965->pp_context;
5231     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5232     VAStatus vaStatus;
5233
5234     switch (fourcc) {
5235     case VA_FOURCC_NV12:
5236         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5237                                                  src_surface,
5238                                                  src_rect,
5239                                                  dst_surface,
5240                                                  dst_rect,
5241                                                  PP_PA_LOAD_SAVE_NV12,
5242                                                  NULL);
5243         intel_batchbuffer_flush(pp_context->batch);
5244         break;
5245
5246     case VA_FOURCC_YV12:
5247         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5248                                                  src_surface,
5249                                                  src_rect,
5250                                                  dst_surface,
5251                                                  dst_rect,
5252                                                  PP_PA_LOAD_SAVE_PL3,
5253                                                  NULL);
5254         intel_batchbuffer_flush(pp_context->batch);
5255         break;
5256
5257     case VA_FOURCC_YUY2:
5258     case VA_FOURCC_UYVY:
5259         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5260                                                  src_surface,
5261                                                  src_rect,
5262                                                  dst_surface,
5263                                                  dst_rect,
5264                                                  PP_PA_LOAD_SAVE_PA,
5265                                                  NULL);
5266         intel_batchbuffer_flush(pp_context->batch);
5267         break;
5268
5269     default:
5270         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5271                                                       i965_image_pl1_processing,
5272                                                       src_surface,
5273                                                       src_rect,
5274                                                       dst_surface,
5275                                                       dst_rect);
5276         break;
5277     }
5278
5279     return vaStatus;
5280 }
5281
5282 // it only support NV12 and P010 for vebox proc ctx
5283 static struct object_surface *derive_surface(VADriverContextP ctx,
5284                                              struct object_image *obj_image,
5285                                              struct object_surface *obj_surface)
5286 {
5287     VAImage * const image = &obj_image->image;
5288
5289     memset((void *)obj_surface, 0, sizeof(*obj_surface));
5290     obj_surface->fourcc = image->format.fourcc;
5291     obj_surface->orig_width = image->width;
5292     obj_surface->orig_height = image->height;
5293     obj_surface->width = image->pitches[0];
5294     obj_surface->height = image->height;
5295     obj_surface->y_cb_offset = image->offsets[1] / obj_surface->width;
5296     obj_surface->y_cr_offset = obj_surface->y_cb_offset;
5297     obj_surface->bo = obj_image->bo;
5298     obj_surface->subsampling = SUBSAMPLE_YUV420;
5299
5300     return obj_surface;
5301 }
5302
5303 static VAStatus
5304 vebox_processing_simple(VADriverContextP ctx,
5305                         struct i965_post_processing_context *pp_context,
5306                         struct object_surface *src_obj_surface,
5307                         struct object_surface *dst_obj_surface,
5308                         const VARectangle *rect)
5309 {
5310     struct i965_driver_data *i965 = i965_driver_data(ctx);
5311     VAProcPipelineParameterBuffer pipeline_param;
5312     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5313
5314     if (pp_context->vebox_proc_ctx == NULL) {
5315         pp_context->vebox_proc_ctx = gen75_vebox_context_init(ctx);
5316     }
5317
5318     memset((void *)&pipeline_param, 0, sizeof(pipeline_param));
5319     pipeline_param.surface_region = rect;
5320     pipeline_param.output_region = rect;
5321     pipeline_param.filter_flags = 0;
5322     pipeline_param.num_filters  = 0;
5323
5324     pp_context->vebox_proc_ctx->pipeline_param = &pipeline_param;
5325     pp_context->vebox_proc_ctx->surface_input_object = src_obj_surface;
5326     pp_context->vebox_proc_ctx->surface_output_object = dst_obj_surface;
5327
5328     if (IS_GEN9(i965->intel.device_info))
5329         status = gen9_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5330
5331     return status;
5332 }
5333
5334 static VAStatus
5335 i965_image_p010_processing(VADriverContextP ctx,
5336                            const struct i965_surface *src_surface,
5337                            const VARectangle *src_rect,
5338                            struct i965_surface *dst_surface,
5339                            const VARectangle *dst_rect)
5340 {
5341 #define HAS_VPP_P010(ctx)        ((ctx)->codec_info->has_vpp_p010 && \
5342                                      (ctx)->intel.has_bsd)
5343
5344     struct i965_driver_data *i965 = i965_driver_data(ctx);
5345     struct i965_post_processing_context *pp_context = i965->pp_context;
5346     struct object_surface *src_obj_surface = NULL, *dst_obj_surface = NULL;
5347     struct object_surface tmp_src_obj_surface, tmp_dst_obj_surface;
5348     struct object_surface *tmp_surface = NULL;
5349     VASurfaceID tmp_surface_id[3], out_surface_id = VA_INVALID_ID;
5350     int num_tmp_surfaces = 0;
5351     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5352     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5353     int vpp_post = 0;
5354
5355     if (HAS_VPP_P010(i965)) {
5356         vpp_post = 0;
5357         switch (fourcc) {
5358         case VA_FOURCC_NV12:
5359             if (src_rect->x != dst_rect->x ||
5360                 src_rect->y != dst_rect->y ||
5361                 src_rect->width != dst_rect->width ||
5362                 src_rect->height != dst_rect->height) {
5363                 vpp_post = 1;
5364             }
5365             break;
5366         case VA_FOURCC_P010:
5367             // don't support scaling while the fourcc of dst_surface is P010
5368             if (src_rect->x != dst_rect->x ||
5369                 src_rect->y != dst_rect->y ||
5370                 src_rect->width != dst_rect->width ||
5371                 src_rect->height != dst_rect->height) {
5372                 vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5373                 goto EXIT;
5374             }
5375             break;
5376         default:
5377             vpp_post = 1;
5378             break;
5379         }
5380
5381         if (src_surface->type == I965_SURFACE_TYPE_IMAGE) {
5382             src_obj_surface = derive_surface(ctx, (struct object_image *)src_surface->base,
5383                                              &tmp_src_obj_surface);
5384         } else
5385             src_obj_surface = (struct object_surface *)src_surface->base;
5386
5387         if (src_obj_surface == NULL) {
5388             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5389             goto EXIT;
5390         }
5391
5392         if (vpp_post == 1) {
5393             vaStatus = i965_CreateSurfaces(ctx,
5394                                            src_obj_surface->orig_width,
5395                                            src_obj_surface->orig_height,
5396                                            VA_RT_FORMAT_YUV420,
5397                                            1,
5398                                            &out_surface_id);
5399             assert(vaStatus == VA_STATUS_SUCCESS);
5400             tmp_surface_id[num_tmp_surfaces++] = out_surface_id;
5401             tmp_surface = SURFACE(out_surface_id);
5402             assert(tmp_surface);
5403             i965_check_alloc_surface_bo(ctx, tmp_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5404         }
5405
5406         if (tmp_surface != NULL)
5407             dst_obj_surface = tmp_surface;
5408         else {
5409             if (dst_surface->type == I965_SURFACE_TYPE_IMAGE) {
5410                 dst_obj_surface = derive_surface(ctx, (struct object_image *)dst_surface->base,
5411                                                  &tmp_dst_obj_surface);
5412             } else
5413                 dst_obj_surface = (struct object_surface *)dst_surface->base;
5414         }
5415
5416         if (dst_obj_surface == NULL) {
5417             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5418             goto EXIT;
5419         }
5420
5421         vaStatus = vebox_processing_simple(ctx,
5422                                            pp_context,
5423                                            src_obj_surface,
5424                                            dst_obj_surface,
5425                                            src_rect);
5426         if (vaStatus != VA_STATUS_SUCCESS)
5427             goto EXIT;
5428
5429         if (vpp_post == 1) {
5430             struct i965_surface src_surface_new;
5431
5432             if (tmp_surface != NULL) {
5433                 src_surface_new.base = (struct object_base *)tmp_surface;
5434                 src_surface_new.type = I965_SURFACE_TYPE_SURFACE;
5435                 src_surface_new.flags = I965_SURFACE_FLAG_FRAME;
5436             } else
5437                 memcpy((void *)&src_surface_new, (void *)src_surface, sizeof(src_surface_new));
5438
5439             vaStatus = i965_image_pl2_processing(ctx,
5440                                                  &src_surface_new,
5441                                                  src_rect,
5442                                                  dst_surface,
5443                                                  dst_rect);
5444         }
5445     }
5446
5447 EXIT:
5448     if (num_tmp_surfaces)
5449         i965_DestroySurfaces(ctx,
5450                              tmp_surface_id,
5451                              num_tmp_surfaces);
5452
5453     return vaStatus;
5454 }
5455
5456 VAStatus
5457 i965_image_processing(VADriverContextP ctx,
5458                       const struct i965_surface *src_surface,
5459                       const VARectangle *src_rect,
5460                       struct i965_surface *dst_surface,
5461                       const VARectangle *dst_rect)
5462 {
5463     struct i965_driver_data *i965 = i965_driver_data(ctx);
5464     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5465
5466     if (HAS_VPP(i965)) {
5467         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5468
5469         _i965LockMutex(&i965->pp_mutex);
5470
5471         switch (fourcc) {
5472         case VA_FOURCC_YV12:
5473         case VA_FOURCC_I420:
5474         case VA_FOURCC_IMC1:
5475         case VA_FOURCC_IMC3:
5476         case VA_FOURCC_422H:
5477         case VA_FOURCC_422V:
5478         case VA_FOURCC_411P:
5479         case VA_FOURCC_444P:
5480         case VA_FOURCC_YV16:
5481             status = i965_image_pl3_processing(ctx,
5482                                                src_surface,
5483                                                src_rect,
5484                                                dst_surface,
5485                                                dst_rect);
5486             break;
5487
5488         case  VA_FOURCC_NV12:
5489             status = i965_image_pl2_processing(ctx,
5490                                                src_surface,
5491                                                src_rect,
5492                                                dst_surface,
5493                                                dst_rect);
5494             break;
5495         case VA_FOURCC_YUY2:
5496         case VA_FOURCC_UYVY:
5497             status = i965_image_pl1_processing(ctx,
5498                                                src_surface,
5499                                                src_rect,
5500                                                dst_surface,
5501                                                dst_rect);
5502             break;
5503         case VA_FOURCC_BGRA:
5504         case VA_FOURCC_BGRX:
5505         case VA_FOURCC_RGBA:
5506         case VA_FOURCC_RGBX:
5507             status = i965_image_pl1_rgbx_processing(ctx,
5508                                                     src_surface,
5509                                                     src_rect,
5510                                                     dst_surface,
5511                                                     dst_rect);
5512             break;
5513         case VA_FOURCC_P010:
5514             status = i965_image_p010_processing(ctx,
5515                                                 src_surface,
5516                                                 src_rect,
5517                                                 dst_surface,
5518                                                 dst_rect);
5519             break;
5520         default:
5521             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5522             break;
5523         }
5524
5525         _i965UnlockMutex(&i965->pp_mutex);
5526     }
5527
5528     return status;
5529 }
5530
5531 static void
5532 i965_post_processing_context_finalize(VADriverContextP ctx,
5533                                       struct i965_post_processing_context *pp_context)
5534 {
5535     int i;
5536
5537     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5538     pp_context->surface_state_binding_table.bo = NULL;
5539
5540     dri_bo_unreference(pp_context->curbe.bo);
5541     pp_context->curbe.bo = NULL;
5542
5543     dri_bo_unreference(pp_context->sampler_state_table.bo);
5544     pp_context->sampler_state_table.bo = NULL;
5545
5546     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5547     pp_context->sampler_state_table.bo_8x8 = NULL;
5548
5549     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5550     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5551
5552     dri_bo_unreference(pp_context->idrt.bo);
5553     pp_context->idrt.bo = NULL;
5554     pp_context->idrt.num_interface_descriptors = 0;
5555
5556     dri_bo_unreference(pp_context->vfe_state.bo);
5557     pp_context->vfe_state.bo = NULL;
5558
5559     for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
5560         pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
5561                                   ctx);
5562
5563     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5564     pp_context->pp_dn_context.stmm_bo = NULL;
5565
5566     for (i = 0; i < NUM_PP_MODULES; i++) {
5567         struct pp_module *pp_module = &pp_context->pp_modules[i];
5568
5569         dri_bo_unreference(pp_module->kernel.bo);
5570         pp_module->kernel.bo = NULL;
5571     }
5572
5573     free(pp_context->pp_static_parameter);
5574     free(pp_context->pp_inline_parameter);
5575     pp_context->pp_static_parameter = NULL;
5576     pp_context->pp_inline_parameter = NULL;
5577 }
5578
5579 void
5580 i965_post_processing_terminate(VADriverContextP ctx)
5581 {
5582     struct i965_driver_data *i965 = i965_driver_data(ctx);
5583     struct i965_post_processing_context *pp_context = i965->pp_context;
5584
5585     if (pp_context) {
5586         pp_context->finalize(ctx, pp_context);
5587         free(pp_context);
5588     }
5589
5590     i965->pp_context = NULL;
5591 }
5592
5593 #define VPP_CURBE_ALLOCATION_SIZE   32
5594
5595 void
5596 i965_post_processing_context_init(VADriverContextP ctx,
5597                                   void *data,
5598                                   struct intel_batchbuffer *batch)
5599 {
5600     struct i965_driver_data *i965 = i965_driver_data(ctx);
5601     int i;
5602     struct i965_post_processing_context *pp_context = data;
5603     const AVSConfig *avs_config;
5604
5605     if (IS_IRONLAKE(i965->intel.device_info)) {
5606         pp_context->urb.size = i965->intel.device_info->urb_size;
5607         pp_context->urb.num_vfe_entries = 32;
5608         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5609         pp_context->urb.num_cs_entries = 1;
5610         pp_context->urb.size_cs_entry = 2;
5611         pp_context->urb.vfe_start = 0;
5612         pp_context->urb.cs_start = pp_context->urb.vfe_start +
5613                                    pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5614         assert(pp_context->urb.cs_start +
5615                pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5616         pp_context->intel_post_processing = ironlake_post_processing;
5617     } else {
5618         pp_context->vfe_gpu_state.max_num_threads = 60;
5619         pp_context->vfe_gpu_state.num_urb_entries = 59;
5620         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5621         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5622         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5623         pp_context->intel_post_processing = gen6_post_processing;
5624     }
5625
5626     pp_context->finalize = i965_post_processing_context_finalize;
5627
5628     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5629     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5630     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5631     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5632
5633     if (IS_HASWELL(i965->intel.device_info))
5634         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5635     else if (IS_GEN7(i965->intel.device_info))
5636         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5637     else if (IS_GEN6(i965->intel.device_info))
5638         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5639     else if (IS_IRONLAKE(i965->intel.device_info))
5640         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5641
5642     for (i = 0; i < NUM_PP_MODULES; i++) {
5643         struct pp_module *pp_module = &pp_context->pp_modules[i];
5644         dri_bo_unreference(pp_module->kernel.bo);
5645         if (pp_module->kernel.bin && pp_module->kernel.size) {
5646             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5647                                                 pp_module->kernel.name,
5648                                                 pp_module->kernel.size,
5649                                                 4096);
5650             assert(pp_module->kernel.bo);
5651             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5652         } else {
5653             pp_module->kernel.bo = NULL;
5654         }
5655     }
5656
5657     /* static & inline parameters */
5658     if (IS_GEN7(i965->intel.device_info)) {
5659         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5660         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5661     } else {
5662         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5663         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5664     }
5665
5666     pp_context->batch = batch;
5667     pp_dndi_context_init(&pp_context->pp_dndi_context);
5668
5669     avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
5670                  &gen6_avs_config;
5671     avs_init_state(&pp_context->pp_avs_context.state, avs_config);
5672 }
5673
5674 bool
5675 i965_post_processing_init(VADriverContextP ctx)
5676 {
5677     struct i965_driver_data *i965 = i965_driver_data(ctx);
5678     struct i965_post_processing_context *pp_context = i965->pp_context;
5679
5680     if (HAS_VPP(i965)) {
5681         if (pp_context == NULL) {
5682             pp_context = calloc(1, sizeof(*pp_context));
5683             assert(pp_context);
5684             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5685             i965->pp_context = pp_context;
5686         }
5687     }
5688
5689     return true;
5690 }
5691
5692 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5693     PP_NULL,    /* VAProcFilterNone */
5694     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5695     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5696     PP_NULL,    /* VAProcFilterSharpening */
5697     PP_NULL,    /* VAProcFilterColorBalance */
5698 };
5699
5700 static const int proc_frame_to_pp_frame[3] = {
5701     I965_SURFACE_FLAG_FRAME,
5702     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5703     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5704 };
5705
5706 enum {
5707     PP_OP_CHANGE_FORMAT = 1 << 0,
5708     PP_OP_CHANGE_SIZE   = 1 << 1,
5709     PP_OP_DEINTERLACE   = 1 << 2,
5710     PP_OP_COMPLEX       = 1 << 3,
5711 };
5712
5713 static int
5714 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
5715                     uint32_t filter_flags)
5716 {
5717     int pp_index = -1;
5718
5719     if (!dst_fourcc)
5720         dst_fourcc = src_fourcc;
5721
5722     switch (src_fourcc) {
5723     case VA_FOURCC_RGBX:
5724     case VA_FOURCC_RGBA:
5725     case VA_FOURCC_BGRX:
5726     case VA_FOURCC_BGRA:
5727         switch (dst_fourcc) {
5728         case VA_FOURCC_NV12:
5729             pp_index = PP_RGBX_LOAD_SAVE_NV12;
5730             break;
5731         }
5732         break;
5733     case VA_FOURCC_YUY2:
5734     case VA_FOURCC_UYVY:
5735         switch (dst_fourcc) {
5736         case VA_FOURCC_NV12:
5737             pp_index = PP_PA_LOAD_SAVE_NV12;
5738             break;
5739         case VA_FOURCC_I420:
5740         case VA_FOURCC_YV12:
5741             pp_index = PP_PA_LOAD_SAVE_PL3;
5742             break;
5743         case VA_FOURCC_YUY2:
5744         case VA_FOURCC_UYVY:
5745             pp_index = PP_PA_LOAD_SAVE_PA;
5746             break;
5747         }
5748         break;
5749     case VA_FOURCC_NV12:
5750         switch (dst_fourcc) {
5751         case VA_FOURCC_NV12:
5752             if (pp_ops & PP_OP_CHANGE_SIZE)
5753                 pp_index = avs_is_needed(filter_flags) ?
5754                            PP_NV12_AVS : PP_NV12_SCALING;
5755             else
5756                 pp_index = PP_NV12_LOAD_SAVE_N12;
5757             break;
5758         case VA_FOURCC_I420:
5759         case VA_FOURCC_YV12:
5760         case VA_FOURCC_IMC1:
5761         case VA_FOURCC_IMC3:
5762             pp_index = PP_NV12_LOAD_SAVE_PL3;
5763             break;
5764         case VA_FOURCC_YUY2:
5765         case VA_FOURCC_UYVY:
5766             pp_index = PP_NV12_LOAD_SAVE_PA;
5767             break;
5768         case VA_FOURCC_RGBX:
5769         case VA_FOURCC_RGBA:
5770         case VA_FOURCC_BGRX:
5771         case VA_FOURCC_BGRA:
5772             pp_index = PP_NV12_LOAD_SAVE_RGBX;
5773             break;
5774         }
5775         break;
5776     case VA_FOURCC_I420:
5777     case VA_FOURCC_YV12:
5778     case VA_FOURCC_IMC1:
5779     case VA_FOURCC_IMC3:
5780     case VA_FOURCC_YV16:
5781     case VA_FOURCC_411P:
5782     case VA_FOURCC_422H:
5783     case VA_FOURCC_422V:
5784     case VA_FOURCC_444P:
5785         switch (dst_fourcc) {
5786         case VA_FOURCC_NV12:
5787             pp_index = PP_PL3_LOAD_SAVE_N12;
5788             break;
5789         case VA_FOURCC_I420:
5790         case VA_FOURCC_YV12:
5791         case VA_FOURCC_IMC1:
5792         case VA_FOURCC_IMC3:
5793             pp_index = PP_PL3_LOAD_SAVE_PL3;
5794             break;
5795         case VA_FOURCC_YUY2:
5796         case VA_FOURCC_UYVY:
5797             pp_index = PP_PL3_LOAD_SAVE_PA;
5798             break;
5799         }
5800         break;
5801     }
5802     return pp_index;
5803 }
5804
5805 static VAStatus
5806 i965_proc_picture_fast(VADriverContextP ctx,
5807                        struct i965_proc_context *proc_context, struct proc_state *proc_state)
5808 {
5809     struct i965_driver_data * const i965 = i965_driver_data(ctx);
5810     const VAProcPipelineParameterBuffer * const pipeline_param =
5811         (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5812     struct object_surface *src_obj_surface, *dst_obj_surface;
5813     struct i965_surface src_surface, dst_surface;
5814     const VAProcFilterParameterBufferDeinterlacing *deint_params = NULL;
5815     VARectangle src_rect, dst_rect;
5816     VAStatus status;
5817     uint32_t i, filter_flags = 0, pp_ops = 0;
5818     int pp_index;
5819
5820     /* Validate pipeline parameters */
5821     if (pipeline_param->num_filters > 0 && !pipeline_param->filters)
5822         return VA_STATUS_ERROR_INVALID_PARAMETER;
5823
5824     for (i = 0; i < pipeline_param->num_filters; i++) {
5825         const VAProcFilterParameterBuffer *filter;
5826         struct object_buffer * const obj_buffer =
5827             BUFFER(pipeline_param->filters[i]);
5828
5829         assert(obj_buffer && obj_buffer->buffer_store);
5830         if (!obj_buffer || !obj_buffer->buffer_store)
5831             return VA_STATUS_ERROR_INVALID_PARAMETER;
5832
5833         filter = (VAProcFilterParameterBuffer *)
5834                  obj_buffer->buffer_store->buffer;
5835         switch (filter->type) {
5836         case VAProcFilterDeinterlacing:
5837             pp_ops |= PP_OP_DEINTERLACE;
5838             deint_params = (VAProcFilterParameterBufferDeinterlacing *)filter;
5839             break;
5840         default:
5841             pp_ops |= PP_OP_COMPLEX;
5842             break;
5843         }
5844     }
5845     filter_flags |= pipeline_param->filter_flags & VA_FILTER_SCALING_MASK;
5846
5847     /* Validate source surface */
5848     src_obj_surface = SURFACE(pipeline_param->surface);
5849     if (!src_obj_surface)
5850         return VA_STATUS_ERROR_INVALID_SURFACE;
5851
5852     if (!src_obj_surface->fourcc)
5853         return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
5854
5855     if (!src_obj_surface->bo)
5856         return VA_STATUS_ERROR_INVALID_SURFACE;
5857
5858     if (pipeline_param->surface_region) {
5859         src_rect.x = pipeline_param->surface_region->x;
5860         src_rect.y = pipeline_param->surface_region->y;
5861         src_rect.width = pipeline_param->surface_region->width;
5862         src_rect.height = pipeline_param->surface_region->height;
5863     } else {
5864         src_rect.x = 0;
5865         src_rect.y = 0;
5866         src_rect.width = src_obj_surface->orig_width;
5867         src_rect.height = src_obj_surface->orig_height;
5868     }
5869
5870     src_surface.base  = &src_obj_surface->base;
5871     src_surface.type  = I965_SURFACE_TYPE_SURFACE;
5872     src_surface.flags = I965_SURFACE_FLAG_FRAME;
5873
5874     if (pp_ops & PP_OP_DEINTERLACE) {
5875         filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
5876                         VA_TOP_FIELD : VA_BOTTOM_FIELD;
5877         if (deint_params->algorithm != VAProcDeinterlacingBob)
5878             pp_ops |= PP_OP_COMPLEX;
5879     } else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
5880         filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
5881                         VA_TOP_FIELD : VA_BOTTOM_FIELD;
5882         pp_ops |= PP_OP_DEINTERLACE;
5883     }
5884     if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
5885         pp_ops |= PP_OP_COMPLEX;
5886
5887     /* Validate target surface */
5888     dst_obj_surface = SURFACE(proc_state->current_render_target);
5889     if (!dst_obj_surface)
5890         return VA_STATUS_ERROR_INVALID_SURFACE;
5891
5892     if (!dst_obj_surface->bo)
5893         return VA_STATUS_ERROR_INVALID_SURFACE;
5894
5895     if (dst_obj_surface->fourcc &&
5896         dst_obj_surface->fourcc != src_obj_surface->fourcc)
5897         pp_ops |= PP_OP_CHANGE_FORMAT;
5898
5899     if (pipeline_param->output_region) {
5900         dst_rect.x = pipeline_param->output_region->x;
5901         dst_rect.y = pipeline_param->output_region->y;
5902         dst_rect.width = pipeline_param->output_region->width;
5903         dst_rect.height = pipeline_param->output_region->height;
5904     } else {
5905         dst_rect.x = 0;
5906         dst_rect.y = 0;
5907         dst_rect.width = dst_obj_surface->orig_width;
5908         dst_rect.height = dst_obj_surface->orig_height;
5909     }
5910
5911     if (dst_rect.width != src_rect.width || dst_rect.height != src_rect.height)
5912         pp_ops |= PP_OP_CHANGE_SIZE;
5913
5914     dst_surface.base  = &dst_obj_surface->base;
5915     dst_surface.type  = I965_SURFACE_TYPE_SURFACE;
5916     dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5917
5918     /* Validate "fast-path" processing capabilities */
5919     if (!IS_GEN7(i965->intel.device_info)) {
5920         if ((pp_ops & PP_OP_CHANGE_FORMAT) && (pp_ops & PP_OP_CHANGE_SIZE))
5921             return VA_STATUS_ERROR_UNIMPLEMENTED; // temporary surface is needed
5922     }
5923     if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
5924         filter_flags &= ~VA_FILTER_SCALING_MASK;
5925         filter_flags |= VA_FILTER_SCALING_FAST;
5926     } else {
5927         if (pp_ops & PP_OP_COMPLEX)
5928             return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
5929         if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
5930             return VA_STATUS_ERROR_UNIMPLEMENTED;
5931     }
5932
5933     pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
5934                                    dst_obj_surface->fourcc, pp_ops, filter_flags);
5935     if (pp_index < 0)
5936         return VA_STATUS_ERROR_UNIMPLEMENTED;
5937
5938     proc_context->pp_context.filter_flags = filter_flags;
5939     status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5940                                            &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
5941     intel_batchbuffer_flush(proc_context->pp_context.batch);
5942     return status;
5943 }
5944
5945 VAStatus
5946 i965_proc_picture(VADriverContextP ctx,
5947                   VAProfile profile,
5948                   union codec_state *codec_state,
5949                   struct hw_context *hw_context)
5950 {
5951     struct i965_driver_data *i965 = i965_driver_data(ctx);
5952     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5953     struct proc_state *proc_state = &codec_state->proc;
5954     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5955     struct object_surface *obj_surface;
5956     struct i965_surface src_surface, dst_surface;
5957     VARectangle src_rect, dst_rect;
5958     VAStatus status;
5959     int i;
5960     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5961     int num_tmp_surfaces = 0;
5962     unsigned int tiling = 0, swizzle = 0;
5963     int in_width, in_height;
5964
5965     if (pipeline_param->surface == VA_INVALID_ID ||
5966         proc_state->current_render_target == VA_INVALID_ID) {
5967         status = VA_STATUS_ERROR_INVALID_SURFACE;
5968         goto error;
5969     }
5970
5971     obj_surface = SURFACE(proc_state->current_render_target);
5972     if (!obj_surface)
5973         return VA_STATUS_ERROR_INVALID_SURFACE;
5974
5975     if (!obj_surface->bo) {
5976         unsigned int expected_format = obj_surface->expected_format;
5977         int fourcc = 0;
5978         int subsample = 0;
5979         int tiling = HAS_TILED_SURFACE(i965);
5980         switch (expected_format) {
5981         case VA_RT_FORMAT_YUV420:
5982             fourcc = VA_FOURCC_NV12;
5983             subsample = SUBSAMPLE_YUV420;
5984             break;
5985         case VA_RT_FORMAT_YUV420_10BPP:
5986             fourcc = VA_FOURCC_P010;
5987             subsample = SUBSAMPLE_YUV420;
5988             break;
5989         case VA_RT_FORMAT_RGB32:
5990             fourcc = VA_FOURCC_RGBA;
5991             subsample = SUBSAMPLE_RGBX;
5992             break;
5993         default:
5994             return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
5995         }
5996         i965_check_alloc_surface_bo(ctx, obj_surface, tiling, fourcc, subsample);
5997     }
5998
5999     obj_surface = SURFACE(pipeline_param->surface);
6000
6001     if (!obj_surface) {
6002         status = VA_STATUS_ERROR_INVALID_SURFACE;
6003         goto error;
6004     }
6005
6006     if (!obj_surface->bo) {
6007         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6008         goto error;
6009     }
6010
6011     if (pipeline_param->num_filters && !pipeline_param->filters) {
6012         status = VA_STATUS_ERROR_INVALID_PARAMETER;
6013         goto error;
6014     }
6015
6016     status = i965_proc_picture_fast(ctx, proc_context, proc_state);
6017     if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
6018         return status;
6019
6020     in_width = obj_surface->orig_width;
6021     in_height = obj_surface->orig_height;
6022     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6023
6024     src_surface.base = (struct object_base *)obj_surface;
6025     src_surface.type = I965_SURFACE_TYPE_SURFACE;
6026     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6027
6028     VASurfaceID out_surface_id = VA_INVALID_ID;
6029     if (obj_surface->fourcc != VA_FOURCC_NV12) {
6030         src_surface.base = (struct object_base *)obj_surface;
6031         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6032         src_surface.flags = I965_SURFACE_FLAG_FRAME;
6033         src_rect.x = 0;
6034         src_rect.y = 0;
6035         src_rect.width = in_width;
6036         src_rect.height = in_height;
6037
6038         status = i965_CreateSurfaces(ctx,
6039                                      in_width,
6040                                      in_height,
6041                                      VA_RT_FORMAT_YUV420,
6042                                      1,
6043                                      &out_surface_id);
6044         if (status != VA_STATUS_SUCCESS)
6045             goto error;
6046         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6047         obj_surface = SURFACE(out_surface_id);
6048         assert(obj_surface);
6049         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6050
6051         dst_surface.base = (struct object_base *)obj_surface;
6052         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6053         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6054         dst_rect.x = 0;
6055         dst_rect.y = 0;
6056         dst_rect.width = in_width;
6057         dst_rect.height = in_height;
6058
6059         status = i965_image_processing(ctx,
6060                                        &src_surface,
6061                                        &src_rect,
6062                                        &dst_surface,
6063                                        &dst_rect);
6064         if (status != VA_STATUS_SUCCESS)
6065             goto error;
6066
6067         src_surface.base = (struct object_base *)obj_surface;
6068         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6069         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6070     }
6071
6072     if (pipeline_param->surface_region) {
6073         src_rect.x = pipeline_param->surface_region->x;
6074         src_rect.y = pipeline_param->surface_region->y;
6075         src_rect.width = pipeline_param->surface_region->width;
6076         src_rect.height = pipeline_param->surface_region->height;
6077     } else {
6078         src_rect.x = 0;
6079         src_rect.y = 0;
6080         src_rect.width = in_width;
6081         src_rect.height = in_height;
6082     }
6083
6084     proc_context->pp_context.pipeline_param = pipeline_param;
6085
6086     for (i = 0; i < pipeline_param->num_filters; i++) {
6087         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6088         VAProcFilterParameterBufferBase *filter_param = NULL;
6089         VAProcFilterType filter_type;
6090         int kernel_index;
6091
6092         if (!obj_buffer ||
6093             !obj_buffer->buffer_store ||
6094             !obj_buffer->buffer_store->buffer) {
6095             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6096             goto error;
6097         }
6098
6099         out_surface_id = VA_INVALID_ID;
6100         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6101         filter_type = filter_param->type;
6102         kernel_index = procfilter_to_pp_flag[filter_type];
6103
6104         if (kernel_index != PP_NULL &&
6105             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6106             status = i965_CreateSurfaces(ctx,
6107                                          in_width,
6108                                          in_height,
6109                                          VA_RT_FORMAT_YUV420,
6110                                          1,
6111                                          &out_surface_id);
6112             assert(status == VA_STATUS_SUCCESS);
6113             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6114             obj_surface = SURFACE(out_surface_id);
6115             assert(obj_surface);
6116             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6117             dst_surface.base = (struct object_base *)obj_surface;
6118             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6119             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6120                                                    &src_surface,
6121                                                    &src_rect,
6122                                                    &dst_surface,
6123                                                    &src_rect,
6124                                                    kernel_index,
6125                                                    filter_param);
6126
6127             if (status == VA_STATUS_SUCCESS) {
6128                 src_surface.base = dst_surface.base;
6129                 src_surface.type = dst_surface.type;
6130                 src_surface.flags = dst_surface.flags;
6131             }
6132         }
6133     }
6134
6135     proc_context->pp_context.pipeline_param = NULL;
6136     obj_surface = SURFACE(proc_state->current_render_target);
6137
6138     if (!obj_surface) {
6139         status = VA_STATUS_ERROR_INVALID_SURFACE;
6140         goto error;
6141     }
6142
6143     if (pipeline_param->output_region) {
6144         dst_rect.x = pipeline_param->output_region->x;
6145         dst_rect.y = pipeline_param->output_region->y;
6146         dst_rect.width = pipeline_param->output_region->width;
6147         dst_rect.height = pipeline_param->output_region->height;
6148     } else {
6149         dst_rect.x = 0;
6150         dst_rect.y = 0;
6151         dst_rect.width = obj_surface->orig_width;
6152         dst_rect.height = obj_surface->orig_height;
6153     }
6154
6155     if (IS_GEN7(i965->intel.device_info) ||
6156         IS_GEN8(i965->intel.device_info) ||
6157         IS_GEN9(i965->intel.device_info)) {
6158         unsigned int saved_filter_flag;
6159         struct i965_post_processing_context *i965pp_context = i965->pp_context;
6160
6161         if (obj_surface->fourcc == 0) {
6162             i965_check_alloc_surface_bo(ctx, obj_surface, 1,
6163                                         VA_FOURCC_NV12,
6164                                         SUBSAMPLE_YUV420);
6165         }
6166
6167         i965_vpp_clear_surface(ctx, &proc_context->pp_context,
6168                                obj_surface,
6169                                pipeline_param->output_background_color);
6170
6171         intel_batchbuffer_flush(hw_context->batch);
6172
6173         saved_filter_flag = i965pp_context->filter_flags;
6174         i965pp_context->filter_flags = (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK);
6175
6176         dst_surface.base = (struct object_base *)obj_surface;
6177         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6178         i965_image_processing(ctx, &src_surface, &src_rect, &dst_surface, &dst_rect);
6179
6180         i965pp_context->filter_flags = saved_filter_flag;
6181
6182         if (num_tmp_surfaces)
6183             i965_DestroySurfaces(ctx,
6184                                  tmp_surfaces,
6185                                  num_tmp_surfaces);
6186
6187         return VA_STATUS_SUCCESS;
6188     }
6189
6190     int csc_needed = 0;
6191     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12) {
6192         csc_needed = 1;
6193         out_surface_id = VA_INVALID_ID;
6194         status = i965_CreateSurfaces(ctx,
6195                                      obj_surface->orig_width,
6196                                      obj_surface->orig_height,
6197                                      VA_RT_FORMAT_YUV420,
6198                                      1,
6199                                      &out_surface_id);
6200         assert(status == VA_STATUS_SUCCESS);
6201         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6202         struct object_surface *csc_surface = SURFACE(out_surface_id);
6203         assert(csc_surface);
6204         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6205         dst_surface.base = (struct object_base *)csc_surface;
6206     } else {
6207         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6208         dst_surface.base = (struct object_base *)obj_surface;
6209     }
6210
6211     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6212     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color);
6213
6214     // load/save doesn't support different origin offset for src and dst surface
6215     if (src_rect.width == dst_rect.width &&
6216         src_rect.height == dst_rect.height &&
6217         src_rect.x == dst_rect.x &&
6218         src_rect.y == dst_rect.y) {
6219         i965_post_processing_internal(ctx, &proc_context->pp_context,
6220                                       &src_surface,
6221                                       &src_rect,
6222                                       &dst_surface,
6223                                       &dst_rect,
6224                                       PP_NV12_LOAD_SAVE_N12,
6225                                       NULL);
6226     } else {
6227
6228         proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
6229         i965_post_processing_internal(ctx, &proc_context->pp_context,
6230                                       &src_surface,
6231                                       &src_rect,
6232                                       &dst_surface,
6233                                       &dst_rect,
6234                                       avs_is_needed(pipeline_param->filter_flags) ? PP_NV12_AVS : PP_NV12_SCALING,
6235                                       NULL);
6236     }
6237
6238     if (csc_needed) {
6239         src_surface.base = dst_surface.base;
6240         src_surface.type = dst_surface.type;
6241         src_surface.flags = dst_surface.flags;
6242         dst_surface.base = (struct object_base *)obj_surface;
6243         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6244         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6245     }
6246
6247     if (num_tmp_surfaces)
6248         i965_DestroySurfaces(ctx,
6249                              tmp_surfaces,
6250                              num_tmp_surfaces);
6251
6252     intel_batchbuffer_flush(hw_context->batch);
6253
6254     return VA_STATUS_SUCCESS;
6255
6256 error:
6257     if (num_tmp_surfaces)
6258         i965_DestroySurfaces(ctx,
6259                              tmp_surfaces,
6260                              num_tmp_surfaces);
6261
6262     return status;
6263 }
6264
6265 static void
6266 i965_proc_context_destroy(void *hw_context)
6267 {
6268     struct i965_proc_context * const proc_context = hw_context;
6269     VADriverContextP const ctx = proc_context->driver_context;
6270
6271     proc_context->pp_context.finalize(ctx, &proc_context->pp_context);
6272     intel_batchbuffer_free(proc_context->base.batch);
6273     free(proc_context);
6274 }
6275
6276 struct hw_context *
6277 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6278 {
6279     struct i965_driver_data *i965 = i965_driver_data(ctx);
6280     struct intel_driver_data *intel = intel_driver_data(ctx);
6281     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6282
6283     if (!proc_context)
6284         return NULL;
6285
6286     proc_context->base.destroy = i965_proc_context_destroy;
6287     proc_context->base.run = i965_proc_picture;
6288     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6289     proc_context->driver_context = ctx;
6290     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6291
6292     return (struct hw_context *)proc_context;
6293 }
6294
6295