OSDN Git Service

Fix a typo
[android-x86/hardware-intel-common-vaapi.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "i965_yuv_coefs.h"
42 #include "intel_media.h"
43 #include "intel_gen_vppapi.h"
44
45 #include "gen75_picture_process.h"
46
47 extern VAStatus
48 vpp_surface_convert(VADriverContextP ctx,
49                     struct object_surface *src_obj_surf,
50                     struct object_surface *dst_obj_surf);
51
52 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
55             MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
56
57 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
58 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
59
60 #define GPU_ASM_BLOCK_WIDTH         16
61 #define GPU_ASM_BLOCK_HEIGHT        8
62 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
63
64 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
65
66 static const uint32_t pp_null_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_scaling_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_avs_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dndi_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_dn_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
100 };
101
102 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
112 };
113
114 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
116 };
117
118 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
120 };
121
122 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
123 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
124 };
125
126 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
127 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
128 };
129
130 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
131                                    const struct i965_surface *src_surface,
132                                    const VARectangle *src_rect,
133                                    struct i965_surface *dst_surface,
134                                    const VARectangle *dst_rect,
135                                    void *filter_param);
136 static VAStatus
137 pp_nv12_avs_initialize(VADriverContextP ctx,
138                        struct i965_post_processing_context *pp_context,
139                        const struct i965_surface *src_surface, const VARectangle *src_rect,
140                        struct i965_surface *dst_surface, const VARectangle *dst_rect,
141                        void *filter_param);
142 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
143                                            const struct i965_surface *src_surface,
144                                            const VARectangle *src_rect,
145                                            struct i965_surface *dst_surface,
146                                            const VARectangle *dst_rect,
147                                            void *filter_param);
148 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
149                                              const struct i965_surface *src_surface,
150                                              const VARectangle *src_rect,
151                                              struct i965_surface *dst_surface,
152                                              const VARectangle *dst_rect,
153                                              void *filter_param);
154 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
155                                                 const struct i965_surface *src_surface,
156                                                 const VARectangle *src_rect,
157                                                 struct i965_surface *dst_surface,
158                                                 const VARectangle *dst_rect,
159                                                 void *filter_param);
160 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
161                                         const struct i965_surface *src_surface,
162                                         const VARectangle *src_rect,
163                                         struct i965_surface *dst_surface,
164                                         const VARectangle *dst_rect,
165                                         void *filter_param);
166 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
167                                       const struct i965_surface *src_surface,
168                                       const VARectangle *src_rect,
169                                       struct i965_surface *dst_surface,
170                                       const VARectangle *dst_rect,
171                                       void *filter_param);
172
173 static struct pp_module pp_modules_gen5[] = {
174     {
175         {
176             "NULL module (for testing)",
177             PP_NULL,
178             pp_null_gen5,
179             sizeof(pp_null_gen5),
180             NULL,
181         },
182
183         pp_null_initialize,
184     },
185
186     {
187         {
188             "NV12_NV12",
189             PP_NV12_LOAD_SAVE_N12,
190             pp_nv12_load_save_nv12_gen5,
191             sizeof(pp_nv12_load_save_nv12_gen5),
192             NULL,
193         },
194
195         pp_plx_load_save_plx_initialize,
196     },
197
198     {
199         {
200             "NV12_PL3",
201             PP_NV12_LOAD_SAVE_PL3,
202             pp_nv12_load_save_pl3_gen5,
203             sizeof(pp_nv12_load_save_pl3_gen5),
204             NULL,
205         },
206
207         pp_plx_load_save_plx_initialize,
208     },
209
210     {
211         {
212             "PL3_NV12",
213             PP_PL3_LOAD_SAVE_N12,
214             pp_pl3_load_save_nv12_gen5,
215             sizeof(pp_pl3_load_save_nv12_gen5),
216             NULL,
217         },
218
219         pp_plx_load_save_plx_initialize,
220     },
221
222     {
223         {
224             "PL3_PL3",
225             PP_PL3_LOAD_SAVE_PL3,
226             pp_pl3_load_save_pl3_gen5,
227             sizeof(pp_pl3_load_save_pl3_gen5),
228             NULL,
229         },
230
231         pp_plx_load_save_plx_initialize
232     },
233
234     {
235         {
236             "NV12 Scaling module",
237             PP_NV12_SCALING,
238             pp_nv12_scaling_gen5,
239             sizeof(pp_nv12_scaling_gen5),
240             NULL,
241         },
242
243         pp_nv12_scaling_initialize,
244     },
245
246     {
247         {
248             "NV12 AVS module",
249             PP_NV12_AVS,
250             pp_nv12_avs_gen5,
251             sizeof(pp_nv12_avs_gen5),
252             NULL,
253         },
254
255         pp_nv12_avs_initialize,
256     },
257
258     {
259         {
260             "NV12 DNDI module",
261             PP_NV12_DNDI,
262             pp_nv12_dndi_gen5,
263             sizeof(pp_nv12_dndi_gen5),
264             NULL,
265         },
266
267         pp_nv12_dndi_initialize,
268     },
269
270     {
271         {
272             "NV12 DN module",
273             PP_NV12_DN,
274             pp_nv12_dn_gen5,
275             sizeof(pp_nv12_dn_gen5),
276             NULL,
277         },
278
279         pp_nv12_dn_initialize,
280     },
281
282     {
283         {
284             "NV12_PA module",
285             PP_NV12_LOAD_SAVE_PA,
286             pp_nv12_load_save_pa_gen5,
287             sizeof(pp_nv12_load_save_pa_gen5),
288             NULL,
289         },
290
291         pp_plx_load_save_plx_initialize,
292     },
293
294     {
295         {
296             "PL3_PA module",
297             PP_PL3_LOAD_SAVE_PA,
298             pp_pl3_load_save_pa_gen5,
299             sizeof(pp_pl3_load_save_pa_gen5),
300             NULL,
301         },
302
303         pp_plx_load_save_plx_initialize,
304     },
305
306     {
307         {
308             "PA_NV12 module",
309             PP_PA_LOAD_SAVE_NV12,
310             pp_pa_load_save_nv12_gen5,
311             sizeof(pp_pa_load_save_nv12_gen5),
312             NULL,
313         },
314
315         pp_plx_load_save_plx_initialize,
316     },
317
318     {
319         {
320             "PA_PL3 module",
321             PP_PA_LOAD_SAVE_PL3,
322             pp_pa_load_save_pl3_gen5,
323             sizeof(pp_pa_load_save_pl3_gen5),
324             NULL,
325         },
326
327         pp_plx_load_save_plx_initialize,
328     },
329
330     {
331         {
332             "PA_PA module",
333             PP_PA_LOAD_SAVE_PA,
334             pp_pa_load_save_pa_gen5,
335             sizeof(pp_pa_load_save_pa_gen5),
336             NULL,
337         },
338
339         pp_plx_load_save_plx_initialize,
340     },
341
342     {
343         {
344             "RGBX_NV12 module",
345             PP_RGBX_LOAD_SAVE_NV12,
346             pp_rgbx_load_save_nv12_gen5,
347             sizeof(pp_rgbx_load_save_nv12_gen5),
348             NULL,
349         },
350
351         pp_plx_load_save_plx_initialize,
352     },
353
354     {
355         {
356             "NV12_RGBX module",
357             PP_NV12_LOAD_SAVE_RGBX,
358             pp_nv12_load_save_rgbx_gen5,
359             sizeof(pp_nv12_load_save_rgbx_gen5),
360             NULL,
361         },
362
363         pp_plx_load_save_plx_initialize,
364     },
365 };
366
367 static const uint32_t pp_null_gen6[][4] = {
368 #include "shaders/post_processing/gen5_6/null.g6b"
369 };
370
371 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
372 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
373 };
374
375 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
376 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
377 };
378
379 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
380 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
381 };
382
383 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
384 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
385 };
386
387 static const uint32_t pp_nv12_scaling_gen6[][4] = {
388 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
389 };
390
391 static const uint32_t pp_nv12_avs_gen6[][4] = {
392 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
393 };
394
395 static const uint32_t pp_nv12_dndi_gen6[][4] = {
396 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
397 };
398
399 static const uint32_t pp_nv12_dn_gen6[][4] = {
400 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
401 };
402
403 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
404 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
405 };
406
407 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
408 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
409 };
410
411 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
412 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
413 };
414
415 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
416 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
417 };
418
419 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
420 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
421 };
422
423 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
424 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
425 };
426
427 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
428 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
429 };
430
431 static struct pp_module pp_modules_gen6[] = {
432     {
433         {
434             "NULL module (for testing)",
435             PP_NULL,
436             pp_null_gen6,
437             sizeof(pp_null_gen6),
438             NULL,
439         },
440
441         pp_null_initialize,
442     },
443
444     {
445         {
446             "NV12_NV12",
447             PP_NV12_LOAD_SAVE_N12,
448             pp_nv12_load_save_nv12_gen6,
449             sizeof(pp_nv12_load_save_nv12_gen6),
450             NULL,
451         },
452
453         pp_plx_load_save_plx_initialize,
454     },
455
456     {
457         {
458             "NV12_PL3",
459             PP_NV12_LOAD_SAVE_PL3,
460             pp_nv12_load_save_pl3_gen6,
461             sizeof(pp_nv12_load_save_pl3_gen6),
462             NULL,
463         },
464
465         pp_plx_load_save_plx_initialize,
466     },
467
468     {
469         {
470             "PL3_NV12",
471             PP_PL3_LOAD_SAVE_N12,
472             pp_pl3_load_save_nv12_gen6,
473             sizeof(pp_pl3_load_save_nv12_gen6),
474             NULL,
475         },
476
477         pp_plx_load_save_plx_initialize,
478     },
479
480     {
481         {
482             "PL3_PL3",
483             PP_PL3_LOAD_SAVE_PL3,
484             pp_pl3_load_save_pl3_gen6,
485             sizeof(pp_pl3_load_save_pl3_gen6),
486             NULL,
487         },
488
489         pp_plx_load_save_plx_initialize,
490     },
491
492     {
493         {
494             "NV12 Scaling module",
495             PP_NV12_SCALING,
496             pp_nv12_scaling_gen6,
497             sizeof(pp_nv12_scaling_gen6),
498             NULL,
499         },
500
501         gen6_nv12_scaling_initialize,
502     },
503
504     {
505         {
506             "NV12 AVS module",
507             PP_NV12_AVS,
508             pp_nv12_avs_gen6,
509             sizeof(pp_nv12_avs_gen6),
510             NULL,
511         },
512
513         pp_nv12_avs_initialize,
514     },
515
516     {
517         {
518             "NV12 DNDI module",
519             PP_NV12_DNDI,
520             pp_nv12_dndi_gen6,
521             sizeof(pp_nv12_dndi_gen6),
522             NULL,
523         },
524
525         pp_nv12_dndi_initialize,
526     },
527
528     {
529         {
530             "NV12 DN module",
531             PP_NV12_DN,
532             pp_nv12_dn_gen6,
533             sizeof(pp_nv12_dn_gen6),
534             NULL,
535         },
536
537         pp_nv12_dn_initialize,
538     },
539     {
540         {
541             "NV12_PA module",
542             PP_NV12_LOAD_SAVE_PA,
543             pp_nv12_load_save_pa_gen6,
544             sizeof(pp_nv12_load_save_pa_gen6),
545             NULL,
546         },
547
548         pp_plx_load_save_plx_initialize,
549     },
550
551     {
552         {
553             "PL3_PA module",
554             PP_PL3_LOAD_SAVE_PA,
555             pp_pl3_load_save_pa_gen6,
556             sizeof(pp_pl3_load_save_pa_gen6),
557             NULL,
558         },
559
560         pp_plx_load_save_plx_initialize,
561     },
562
563     {
564         {
565             "PA_NV12 module",
566             PP_PA_LOAD_SAVE_NV12,
567             pp_pa_load_save_nv12_gen6,
568             sizeof(pp_pa_load_save_nv12_gen6),
569             NULL,
570         },
571
572         pp_plx_load_save_plx_initialize,
573     },
574
575     {
576         {
577             "PA_PL3 module",
578             PP_PA_LOAD_SAVE_PL3,
579             pp_pa_load_save_pl3_gen6,
580             sizeof(pp_pa_load_save_pl3_gen6),
581             NULL,
582         },
583
584         pp_plx_load_save_plx_initialize,
585     },
586
587     {
588         {
589             "PA_PA module",
590             PP_PA_LOAD_SAVE_PA,
591             pp_pa_load_save_pa_gen6,
592             sizeof(pp_pa_load_save_pa_gen6),
593             NULL,
594         },
595
596         pp_plx_load_save_plx_initialize,
597     },
598
599     {
600         {
601             "RGBX_NV12 module",
602             PP_RGBX_LOAD_SAVE_NV12,
603             pp_rgbx_load_save_nv12_gen6,
604             sizeof(pp_rgbx_load_save_nv12_gen6),
605             NULL,
606         },
607
608         pp_plx_load_save_plx_initialize,
609     },
610
611     {
612         {
613             "NV12_RGBX module",
614             PP_NV12_LOAD_SAVE_RGBX,
615             pp_nv12_load_save_rgbx_gen6,
616             sizeof(pp_nv12_load_save_rgbx_gen6),
617             NULL,
618         },
619
620         pp_plx_load_save_plx_initialize,
621     },
622 };
623
624 static const uint32_t pp_null_gen7[][4] = {
625 };
626
627 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
628 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
629 };
630
631 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
632 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
633 };
634
635 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
636 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
637 };
638
639 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
640 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
641 };
642
643 static const uint32_t pp_nv12_scaling_gen7[][4] = {
644 #include "shaders/post_processing/gen7/avs.g7b"
645 };
646
647 static const uint32_t pp_nv12_avs_gen7[][4] = {
648 #include "shaders/post_processing/gen7/avs.g7b"
649 };
650
651 static const uint32_t pp_nv12_dndi_gen7[][4] = {
652 #include "shaders/post_processing/gen7/dndi.g7b"
653 };
654
655 static const uint32_t pp_nv12_dn_gen7[][4] = {
656 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
657 };
658 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
659 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
660 };
661 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
662 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
663 };
664 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
665 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
666 };
667 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
668 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
669 };
670 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
671 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
672 };
673 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
674 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
675 };
676 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
677 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
678 };
679
680 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
681                                            const struct i965_surface *src_surface,
682                                            const VARectangle *src_rect,
683                                            struct i965_surface *dst_surface,
684                                            const VARectangle *dst_rect,
685                                            void *filter_param);
686 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
687                                              const struct i965_surface *src_surface,
688                                              const VARectangle *src_rect,
689                                              struct i965_surface *dst_surface,
690                                              const VARectangle *dst_rect,
691                                              void *filter_param);
692 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
693                                            const struct i965_surface *src_surface,
694                                            const VARectangle *src_rect,
695                                            struct i965_surface *dst_surface,
696                                            const VARectangle *dst_rect,
697                                            void *filter_param);
698
699 static struct pp_module pp_modules_gen7[] = {
700     {
701         {
702             "NULL module (for testing)",
703             PP_NULL,
704             pp_null_gen7,
705             sizeof(pp_null_gen7),
706             NULL,
707         },
708
709         pp_null_initialize,
710     },
711
712     {
713         {
714             "NV12_NV12",
715             PP_NV12_LOAD_SAVE_N12,
716             pp_nv12_load_save_nv12_gen7,
717             sizeof(pp_nv12_load_save_nv12_gen7),
718             NULL,
719         },
720
721         gen7_pp_plx_avs_initialize,
722     },
723
724     {
725         {
726             "NV12_PL3",
727             PP_NV12_LOAD_SAVE_PL3,
728             pp_nv12_load_save_pl3_gen7,
729             sizeof(pp_nv12_load_save_pl3_gen7),
730             NULL,
731         },
732
733         gen7_pp_plx_avs_initialize,
734     },
735
736     {
737         {
738             "PL3_NV12",
739             PP_PL3_LOAD_SAVE_N12,
740             pp_pl3_load_save_nv12_gen7,
741             sizeof(pp_pl3_load_save_nv12_gen7),
742             NULL,
743         },
744
745         gen7_pp_plx_avs_initialize,
746     },
747
748     {
749         {
750             "PL3_PL3",
751             PP_PL3_LOAD_SAVE_PL3,
752             pp_pl3_load_save_pl3_gen7,
753             sizeof(pp_pl3_load_save_pl3_gen7),
754             NULL,
755         },
756
757         gen7_pp_plx_avs_initialize,
758     },
759
760     {
761         {
762             "NV12 Scaling module",
763             PP_NV12_SCALING,
764             pp_nv12_scaling_gen7,
765             sizeof(pp_nv12_scaling_gen7),
766             NULL,
767         },
768
769         gen7_pp_plx_avs_initialize,
770     },
771
772     {
773         {
774             "NV12 AVS module",
775             PP_NV12_AVS,
776             pp_nv12_avs_gen7,
777             sizeof(pp_nv12_avs_gen7),
778             NULL,
779         },
780
781         gen7_pp_plx_avs_initialize,
782     },
783
784     {
785         {
786             "NV12 DNDI module",
787             PP_NV12_DNDI,
788             pp_nv12_dndi_gen7,
789             sizeof(pp_nv12_dndi_gen7),
790             NULL,
791         },
792
793         gen7_pp_nv12_dndi_initialize,
794     },
795
796     {
797         {
798             "NV12 DN module",
799             PP_NV12_DN,
800             pp_nv12_dn_gen7,
801             sizeof(pp_nv12_dn_gen7),
802             NULL,
803         },
804
805         gen7_pp_nv12_dn_initialize,
806     },
807     {
808         {
809             "NV12_PA module",
810             PP_NV12_LOAD_SAVE_PA,
811             pp_nv12_load_save_pa_gen7,
812             sizeof(pp_nv12_load_save_pa_gen7),
813             NULL,
814         },
815
816         gen7_pp_plx_avs_initialize,
817     },
818
819     {
820         {
821             "PL3_PA module",
822             PP_PL3_LOAD_SAVE_PA,
823             pp_pl3_load_save_pa_gen7,
824             sizeof(pp_pl3_load_save_pa_gen7),
825             NULL,
826         },
827
828         gen7_pp_plx_avs_initialize,
829     },
830
831     {
832         {
833             "PA_NV12 module",
834             PP_PA_LOAD_SAVE_NV12,
835             pp_pa_load_save_nv12_gen7,
836             sizeof(pp_pa_load_save_nv12_gen7),
837             NULL,
838         },
839
840         gen7_pp_plx_avs_initialize,
841     },
842
843     {
844         {
845             "PA_PL3 module",
846             PP_PA_LOAD_SAVE_PL3,
847             pp_pa_load_save_pl3_gen7,
848             sizeof(pp_pa_load_save_pl3_gen7),
849             NULL,
850         },
851
852         gen7_pp_plx_avs_initialize,
853     },
854
855     {
856         {
857             "PA_PA module",
858             PP_PA_LOAD_SAVE_PA,
859             pp_pa_load_save_pa_gen7,
860             sizeof(pp_pa_load_save_pa_gen7),
861             NULL,
862         },
863
864         gen7_pp_plx_avs_initialize,
865     },
866
867     {
868         {
869             "RGBX_NV12 module",
870             PP_RGBX_LOAD_SAVE_NV12,
871             pp_rgbx_load_save_nv12_gen7,
872             sizeof(pp_rgbx_load_save_nv12_gen7),
873             NULL,
874         },
875
876         gen7_pp_plx_avs_initialize,
877     },
878
879     {
880         {
881             "NV12_RGBX module",
882             PP_NV12_LOAD_SAVE_RGBX,
883             pp_nv12_load_save_rgbx_gen7,
884             sizeof(pp_nv12_load_save_rgbx_gen7),
885             NULL,
886         },
887
888         gen7_pp_plx_avs_initialize,
889     },
890
891 };
892
893 static const uint32_t pp_null_gen75[][4] = {
894 };
895
896 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
897 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
898 };
899
900 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
901 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
902 };
903
904 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
905 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
906 };
907
908 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
909 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
910 };
911
912 static const uint32_t pp_nv12_scaling_gen75[][4] = {
913 #include "shaders/post_processing/gen7/avs.g75b"
914 };
915
916 static const uint32_t pp_nv12_avs_gen75[][4] = {
917 #include "shaders/post_processing/gen7/avs.g75b"
918 };
919
920 static const uint32_t pp_nv12_dndi_gen75[][4] = {
921 // #include "shaders/post_processing/gen7/dndi.g75b"
922 };
923
924 static const uint32_t pp_nv12_dn_gen75[][4] = {
925 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
926 };
927 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
928 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
929 };
930 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
931 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
932 };
933 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
934 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
935 };
936 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
937 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
938 };
939 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
940 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
941 };
942 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
943 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
944 };
945 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
946 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
947 };
948
949 static struct pp_module pp_modules_gen75[] = {
950     {
951         {
952             "NULL module (for testing)",
953             PP_NULL,
954             pp_null_gen75,
955             sizeof(pp_null_gen75),
956             NULL,
957         },
958
959         pp_null_initialize,
960     },
961
962     {
963         {
964             "NV12_NV12",
965             PP_NV12_LOAD_SAVE_N12,
966             pp_nv12_load_save_nv12_gen75,
967             sizeof(pp_nv12_load_save_nv12_gen75),
968             NULL,
969         },
970
971         gen7_pp_plx_avs_initialize,
972     },
973
974     {
975         {
976             "NV12_PL3",
977             PP_NV12_LOAD_SAVE_PL3,
978             pp_nv12_load_save_pl3_gen75,
979             sizeof(pp_nv12_load_save_pl3_gen75),
980             NULL,
981         },
982
983         gen7_pp_plx_avs_initialize,
984     },
985
986     {
987         {
988             "PL3_NV12",
989             PP_PL3_LOAD_SAVE_N12,
990             pp_pl3_load_save_nv12_gen75,
991             sizeof(pp_pl3_load_save_nv12_gen75),
992             NULL,
993         },
994
995         gen7_pp_plx_avs_initialize,
996     },
997
998     {
999         {
1000             "PL3_PL3",
1001             PP_PL3_LOAD_SAVE_PL3,
1002             pp_pl3_load_save_pl3_gen75,
1003             sizeof(pp_pl3_load_save_pl3_gen75),
1004             NULL,
1005         },
1006
1007         gen7_pp_plx_avs_initialize,
1008     },
1009
1010     {
1011         {
1012             "NV12 Scaling module",
1013             PP_NV12_SCALING,
1014             pp_nv12_scaling_gen75,
1015             sizeof(pp_nv12_scaling_gen75),
1016             NULL,
1017         },
1018
1019         gen7_pp_plx_avs_initialize,
1020     },
1021
1022     {
1023         {
1024             "NV12 AVS module",
1025             PP_NV12_AVS,
1026             pp_nv12_avs_gen75,
1027             sizeof(pp_nv12_avs_gen75),
1028             NULL,
1029         },
1030
1031         gen7_pp_plx_avs_initialize,
1032     },
1033
1034     {
1035         {
1036             "NV12 DNDI module",
1037             PP_NV12_DNDI,
1038             pp_nv12_dndi_gen75,
1039             sizeof(pp_nv12_dndi_gen75),
1040             NULL,
1041         },
1042
1043         gen7_pp_nv12_dn_initialize,
1044     },
1045
1046     {
1047         {
1048             "NV12 DN module",
1049             PP_NV12_DN,
1050             pp_nv12_dn_gen75,
1051             sizeof(pp_nv12_dn_gen75),
1052             NULL,
1053         },
1054
1055         gen7_pp_nv12_dn_initialize,
1056     },
1057
1058     {
1059         {
1060             "NV12_PA module",
1061             PP_NV12_LOAD_SAVE_PA,
1062             pp_nv12_load_save_pa_gen75,
1063             sizeof(pp_nv12_load_save_pa_gen75),
1064             NULL,
1065         },
1066
1067         gen7_pp_plx_avs_initialize,
1068     },
1069
1070     {
1071         {
1072             "PL3_PA module",
1073             PP_PL3_LOAD_SAVE_PA,
1074             pp_pl3_load_save_pa_gen75,
1075             sizeof(pp_pl3_load_save_pa_gen75),
1076             NULL,
1077         },
1078
1079         gen7_pp_plx_avs_initialize,
1080     },
1081
1082     {
1083         {
1084             "PA_NV12 module",
1085             PP_PA_LOAD_SAVE_NV12,
1086             pp_pa_load_save_nv12_gen75,
1087             sizeof(pp_pa_load_save_nv12_gen75),
1088             NULL,
1089         },
1090
1091         gen7_pp_plx_avs_initialize,
1092     },
1093
1094     {
1095         {
1096             "PA_PL3 module",
1097             PP_PA_LOAD_SAVE_PL3,
1098             pp_pa_load_save_pl3_gen75,
1099             sizeof(pp_pa_load_save_pl3_gen75),
1100             NULL,
1101         },
1102
1103         gen7_pp_plx_avs_initialize,
1104     },
1105
1106     {
1107         {
1108             "PA_PA module",
1109             PP_PA_LOAD_SAVE_PA,
1110             pp_pa_load_save_pa_gen75,
1111             sizeof(pp_pa_load_save_pa_gen75),
1112             NULL,
1113         },
1114
1115         gen7_pp_plx_avs_initialize,
1116     },
1117
1118     {
1119         {
1120             "RGBX_NV12 module",
1121             PP_RGBX_LOAD_SAVE_NV12,
1122             pp_rgbx_load_save_nv12_gen75,
1123             sizeof(pp_rgbx_load_save_nv12_gen75),
1124             NULL,
1125         },
1126
1127         gen7_pp_plx_avs_initialize,
1128     },
1129
1130     {
1131         {
1132             "NV12_RGBX module",
1133             PP_NV12_LOAD_SAVE_RGBX,
1134             pp_nv12_load_save_rgbx_gen75,
1135             sizeof(pp_nv12_load_save_rgbx_gen75),
1136             NULL,
1137         },
1138
1139         gen7_pp_plx_avs_initialize,
1140     },
1141
1142 };
1143
1144 static void
1145 pp_dndi_frame_store_reset(DNDIFrameStore *fs)
1146 {
1147     fs->obj_surface = NULL;
1148     fs->surface_id = VA_INVALID_ID;
1149     fs->is_scratch_surface = 0;
1150 }
1151
1152 static inline void
1153 pp_dndi_frame_store_swap(DNDIFrameStore *fs1, DNDIFrameStore *fs2)
1154 {
1155     const DNDIFrameStore tmpfs = *fs1;
1156     *fs1 = *fs2;
1157     *fs2 = tmpfs;
1158 }
1159
1160 static inline void
1161 pp_dndi_frame_store_clear(DNDIFrameStore *fs, VADriverContextP ctx)
1162 {
1163     if (fs->obj_surface && fs->is_scratch_surface) {
1164         VASurfaceID va_surface = fs->obj_surface->base.id;
1165         i965_DestroySurfaces(ctx, &va_surface, 1);
1166     }
1167     pp_dndi_frame_store_reset(fs);
1168 }
1169
1170 static void
1171 pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
1172 {
1173     int i;
1174
1175     memset(dndi_ctx, 0, sizeof(*dndi_ctx));
1176     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i++)
1177         pp_dndi_frame_store_reset(&dndi_ctx->frame_store[i]);
1178 }
1179
1180 static VAStatus
1181 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
1182                                     struct object_surface *obj_surface,
1183                                     const VAProcPipelineParameterBuffer *pipe_params,
1184                                     const VAProcFilterParameterBufferDeinterlacing *deint_params)
1185 {
1186     DNDIFrameStore *fs;
1187
1188     dndi_ctx->is_di_enabled = 1;
1189     dndi_ctx->is_di_adv_enabled = 0;
1190     dndi_ctx->is_first_frame = 0;
1191     dndi_ctx->is_second_field = 0;
1192
1193     /* Check whether we are deinterlacing the second field */
1194     if (dndi_ctx->is_di_enabled) {
1195         const unsigned int tff =
1196             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
1197         const unsigned int is_top_field =
1198             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
1199
1200         if ((tff ^ is_top_field) != 0) {
1201             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1202             if (fs->surface_id != obj_surface->base.id) {
1203                 WARN_ONCE("invalid surface provided for second field\n");
1204                 return VA_STATUS_ERROR_INVALID_PARAMETER;
1205             }
1206             dndi_ctx->is_second_field = 1;
1207         }
1208     }
1209
1210     /* Check whether we are deinterlacing the first frame */
1211     if (dndi_ctx->is_di_enabled) {
1212         switch (deint_params->algorithm) {
1213         case VAProcDeinterlacingBob:
1214             dndi_ctx->is_first_frame = 1;
1215             break;
1216         case VAProcDeinterlacingMotionAdaptive:
1217         case VAProcDeinterlacingMotionCompensated:
1218             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1219             if (fs->surface_id == VA_INVALID_ID)
1220                 dndi_ctx->is_first_frame = 1;
1221             else if (dndi_ctx->is_second_field) {
1222                 /* At this stage, we have already deinterlaced the
1223                    first field successfully. So, the first frame flag
1224                    is trigerred if the previous field was deinterlaced
1225                    without reference frame */
1226                 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1227                 if (fs->surface_id == VA_INVALID_ID)
1228                     dndi_ctx->is_first_frame = 1;
1229             } else {
1230                 if (pipe_params->num_forward_references < 1 ||
1231                     pipe_params->forward_references[0] == VA_INVALID_ID) {
1232                     WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
1233                     return VA_STATUS_ERROR_INVALID_PARAMETER;
1234                 }
1235             }
1236             dndi_ctx->is_di_adv_enabled = 1;
1237             break;
1238         default:
1239             WARN_ONCE("unsupported deinterlacing algorithm (%d)\n",
1240                       deint_params->algorithm);
1241             return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
1242         }
1243     }
1244     return VA_STATUS_SUCCESS;
1245 }
1246
1247 static VAStatus
1248 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
1249                                         struct i965_post_processing_context *pp_context,
1250                                         struct object_surface *src_surface, struct object_surface *dst_surface)
1251 {
1252     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1253     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1254     unsigned int src_fourcc, dst_fourcc;
1255     unsigned int src_sampling, dst_sampling;
1256     unsigned int src_tiling, dst_tiling;
1257     unsigned int i, swizzle;
1258     VAStatus status;
1259
1260     /* Determine input surface info. Always use NV12 Y-tiled */
1261     if (src_surface->bo) {
1262         src_fourcc = src_surface->fourcc;
1263         src_sampling = src_surface->subsampling;
1264         dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
1265         src_tiling = !!src_tiling;
1266     } else {
1267         src_fourcc = VA_FOURCC_NV12;
1268         src_sampling = SUBSAMPLE_YUV420;
1269         src_tiling = 1;
1270         status = i965_check_alloc_surface_bo(ctx, src_surface,
1271                                              src_tiling, src_fourcc, src_sampling);
1272         if (status != VA_STATUS_SUCCESS)
1273             return status;
1274     }
1275
1276     /* Determine output surface info. Always use NV12 Y-tiled */
1277     if (dst_surface->bo) {
1278         dst_fourcc   = dst_surface->fourcc;
1279         dst_sampling = dst_surface->subsampling;
1280         dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
1281         dst_tiling = !!dst_tiling;
1282     } else {
1283         dst_fourcc = VA_FOURCC_NV12;
1284         dst_sampling = SUBSAMPLE_YUV420;
1285         dst_tiling = 1;
1286         status = i965_check_alloc_surface_bo(ctx, dst_surface,
1287                                              dst_tiling, dst_fourcc, dst_sampling);
1288         if (status != VA_STATUS_SUCCESS)
1289             return status;
1290     }
1291
1292     /* Create pipeline surfaces */
1293     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i ++) {
1294         struct object_surface *obj_surface;
1295         VASurfaceID new_surface;
1296         unsigned int width, height;
1297
1298         if (dndi_ctx->frame_store[i].obj_surface &&
1299             dndi_ctx->frame_store[i].obj_surface->bo)
1300             continue; // user allocated surface, not VPP internal
1301
1302         if (dndi_ctx->frame_store[i].obj_surface) {
1303             obj_surface = dndi_ctx->frame_store[i].obj_surface;
1304             dndi_ctx->frame_store[i].is_scratch_surface = 0;
1305         } else {
1306             if (i <= DNDI_FRAME_IN_STMM) {
1307                 width = src_surface->orig_width;
1308                 height = src_surface->orig_height;
1309             } else {
1310                 width = dst_surface->orig_width;
1311                 height = dst_surface->orig_height;
1312             }
1313
1314             status = i965_CreateSurfaces(ctx, width, height, VA_RT_FORMAT_YUV420,
1315                                          1, &new_surface);
1316             if (status != VA_STATUS_SUCCESS)
1317                 return status;
1318
1319             obj_surface = SURFACE(new_surface);
1320             assert(obj_surface != NULL);
1321             dndi_ctx->frame_store[i].is_scratch_surface = 1;
1322         }
1323
1324         if (i <= DNDI_FRAME_IN_PREVIOUS) {
1325             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1326                                                  src_tiling, src_fourcc, src_sampling);
1327         } else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
1328             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1329                                                  1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
1330         } else if (i >= DNDI_FRAME_OUT_CURRENT) {
1331             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1332                                                  dst_tiling, dst_fourcc, dst_sampling);
1333         }
1334         if (status != VA_STATUS_SUCCESS)
1335             return status;
1336
1337         dndi_ctx->frame_store[i].obj_surface = obj_surface;
1338     }
1339     return VA_STATUS_SUCCESS;
1340 }
1341
1342 static VAStatus
1343 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
1344                                 struct i965_post_processing_context *pp_context,
1345                                 struct object_surface *src_surface, struct object_surface *dst_surface)
1346 {
1347     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1348     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1349     DNDIFrameStore *ifs, *ofs;
1350     bool is_new_frame = false;
1351
1352     /* Update the previous input surface */
1353     is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
1354                    src_surface->base.id;
1355     if (is_new_frame) {
1356         ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1357         ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1358         do {
1359             const VAProcPipelineParameterBuffer * const pipe_params =
1360                 pp_context->pipeline_param;
1361             struct object_surface *obj_surface;
1362
1363             if (pipe_params->num_forward_references < 1)
1364                 break;
1365             if (pipe_params->forward_references[0] == VA_INVALID_ID)
1366                 break;
1367
1368             obj_surface = SURFACE(pipe_params->forward_references[0]);
1369             if (!obj_surface || obj_surface->base.id == ifs->surface_id)
1370                 break;
1371
1372             pp_dndi_frame_store_clear(ifs, ctx);
1373             if (obj_surface->base.id == ofs->surface_id) {
1374                 *ifs = *ofs;
1375                 pp_dndi_frame_store_reset(ofs);
1376             } else {
1377                 ifs->obj_surface = obj_surface;
1378                 ifs->surface_id = obj_surface->base.id;
1379             }
1380         } while (0);
1381     }
1382
1383     /* Update the input surface */
1384     ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1385     pp_dndi_frame_store_clear(ifs, ctx);
1386     ifs->obj_surface = src_surface;
1387     ifs->surface_id = src_surface->base.id;
1388
1389     /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
1390     if (is_new_frame)
1391         pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
1392                                  &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
1393
1394     /* Update the output surfaces */
1395     ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
1396     if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
1397         pp_dndi_frame_store_swap(ofs,
1398                                  &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
1399         if (!dndi_ctx->is_second_field)
1400             ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
1401     }
1402     pp_dndi_frame_store_clear(ofs, ctx);
1403     ofs->obj_surface = dst_surface;
1404     ofs->surface_id = dst_surface->base.id;
1405
1406     return VA_STATUS_SUCCESS;
1407 }
1408
1409 int
1410 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1411 {
1412     int fourcc;
1413
1414     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1415         struct object_image *obj_image = (struct object_image *)surface->base;
1416         fourcc = obj_image->image.format.fourcc;
1417     } else {
1418         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1419         fourcc = obj_surface->fourcc;
1420     }
1421
1422     return fourcc;
1423 }
1424
1425 static void
1426 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1427 {
1428     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1429         struct object_image *obj_image = (struct object_image *)surface->base;
1430
1431         *width = obj_image->image.width;
1432         *height = obj_image->image.height;
1433     } else {
1434         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1435
1436         *width = obj_surface->orig_width;
1437         *height = obj_surface->orig_height;
1438     }
1439 }
1440
1441 static void
1442 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1443 {
1444     switch (tiling) {
1445     case I915_TILING_NONE:
1446         ss->ss3.tiled_surface = 0;
1447         ss->ss3.tile_walk = 0;
1448         break;
1449     case I915_TILING_X:
1450         ss->ss3.tiled_surface = 1;
1451         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1452         break;
1453     case I915_TILING_Y:
1454         ss->ss3.tiled_surface = 1;
1455         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1456         break;
1457     }
1458 }
1459
1460 static void
1461 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1462 {
1463     switch (tiling) {
1464     case I915_TILING_NONE:
1465         ss->ss2.tiled_surface = 0;
1466         ss->ss2.tile_walk = 0;
1467         break;
1468     case I915_TILING_X:
1469         ss->ss2.tiled_surface = 1;
1470         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1471         break;
1472     case I915_TILING_Y:
1473         ss->ss2.tiled_surface = 1;
1474         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1475         break;
1476     }
1477 }
1478
1479 static void
1480 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1481 {
1482     switch (tiling) {
1483     case I915_TILING_NONE:
1484         ss->ss0.tiled_surface = 0;
1485         ss->ss0.tile_walk = 0;
1486         break;
1487     case I915_TILING_X:
1488         ss->ss0.tiled_surface = 1;
1489         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1490         break;
1491     case I915_TILING_Y:
1492         ss->ss0.tiled_surface = 1;
1493         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1494         break;
1495     }
1496 }
1497
1498 static void
1499 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1500 {
1501     switch (tiling) {
1502     case I915_TILING_NONE:
1503         ss->ss2.tiled_surface = 0;
1504         ss->ss2.tile_walk = 0;
1505         break;
1506     case I915_TILING_X:
1507         ss->ss2.tiled_surface = 1;
1508         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1509         break;
1510     case I915_TILING_Y:
1511         ss->ss2.tiled_surface = 1;
1512         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1513         break;
1514     }
1515 }
1516
1517 static void
1518 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1519 {
1520     struct i965_interface_descriptor *desc;
1521     dri_bo *bo;
1522     int pp_index = pp_context->current_pp;
1523
1524     bo = pp_context->idrt.bo;
1525     dri_bo_map(bo, 1);
1526     assert(bo->virtual);
1527     desc = bo->virtual;
1528     memset(desc, 0, sizeof(*desc));
1529     desc->desc0.grf_reg_blocks = 10;
1530     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1531     desc->desc1.const_urb_entry_read_offset = 0;
1532     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1533     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1534     desc->desc2.sampler_count = 0;
1535     desc->desc3.binding_table_entry_count = 0;
1536     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1537
1538     dri_bo_emit_reloc(bo,
1539                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1540                       desc->desc0.grf_reg_blocks,
1541                       offsetof(struct i965_interface_descriptor, desc0),
1542                       pp_context->pp_modules[pp_index].kernel.bo);
1543
1544     dri_bo_emit_reloc(bo,
1545                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1546                       desc->desc2.sampler_count << 2,
1547                       offsetof(struct i965_interface_descriptor, desc2),
1548                       pp_context->sampler_state_table.bo);
1549
1550     dri_bo_unmap(bo);
1551     pp_context->idrt.num_interface_descriptors++;
1552 }
1553
1554 static void
1555 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1556 {
1557     struct i965_vfe_state *vfe_state;
1558     dri_bo *bo;
1559
1560     bo = pp_context->vfe_state.bo;
1561     dri_bo_map(bo, 1);
1562     assert(bo->virtual);
1563     vfe_state = bo->virtual;
1564     memset(vfe_state, 0, sizeof(*vfe_state));
1565     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1566     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1567     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1568     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1569     vfe_state->vfe1.children_present = 0;
1570     vfe_state->vfe2.interface_descriptor_base =
1571         pp_context->idrt.bo->offset >> 4; /* reloc */
1572     dri_bo_emit_reloc(bo,
1573                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1574                       0,
1575                       offsetof(struct i965_vfe_state, vfe2),
1576                       pp_context->idrt.bo);
1577     dri_bo_unmap(bo);
1578 }
1579
1580 static void
1581 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1582 {
1583     unsigned char *constant_buffer;
1584     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1585
1586     assert(sizeof(*pp_static_parameter) == 128);
1587     dri_bo_map(pp_context->curbe.bo, 1);
1588     assert(pp_context->curbe.bo->virtual);
1589     constant_buffer = pp_context->curbe.bo->virtual;
1590     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1591     dri_bo_unmap(pp_context->curbe.bo);
1592 }
1593
1594 static void
1595 ironlake_pp_states_setup(VADriverContextP ctx,
1596                          struct i965_post_processing_context *pp_context)
1597 {
1598     ironlake_pp_interface_descriptor_table(pp_context);
1599     ironlake_pp_vfe_state(pp_context);
1600     ironlake_pp_upload_constants(pp_context);
1601 }
1602
1603 static void
1604 ironlake_pp_pipeline_select(VADriverContextP ctx,
1605                             struct i965_post_processing_context *pp_context)
1606 {
1607     struct intel_batchbuffer *batch = pp_context->batch;
1608
1609     BEGIN_BATCH(batch, 1);
1610     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1611     ADVANCE_BATCH(batch);
1612 }
1613
1614 static void
1615 ironlake_pp_urb_layout(VADriverContextP ctx,
1616                        struct i965_post_processing_context *pp_context)
1617 {
1618     struct intel_batchbuffer *batch = pp_context->batch;
1619     unsigned int vfe_fence, cs_fence;
1620
1621     vfe_fence = pp_context->urb.cs_start;
1622     cs_fence = pp_context->urb.size;
1623
1624     BEGIN_BATCH(batch, 3);
1625     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1626     OUT_BATCH(batch, 0);
1627     OUT_BATCH(batch,
1628               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1629               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1630     ADVANCE_BATCH(batch);
1631 }
1632
1633 static void
1634 ironlake_pp_state_base_address(VADriverContextP ctx,
1635                                struct i965_post_processing_context *pp_context)
1636 {
1637     struct intel_batchbuffer *batch = pp_context->batch;
1638
1639     BEGIN_BATCH(batch, 8);
1640     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1641     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1642     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1643     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1644     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1645     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1646     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1647     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1648     ADVANCE_BATCH(batch);
1649 }
1650
1651 static void
1652 ironlake_pp_state_pointers(VADriverContextP ctx,
1653                            struct i965_post_processing_context *pp_context)
1654 {
1655     struct intel_batchbuffer *batch = pp_context->batch;
1656
1657     BEGIN_BATCH(batch, 3);
1658     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1659     OUT_BATCH(batch, 0);
1660     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1661     ADVANCE_BATCH(batch);
1662 }
1663
1664 static void
1665 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1666                           struct i965_post_processing_context *pp_context)
1667 {
1668     struct intel_batchbuffer *batch = pp_context->batch;
1669
1670     BEGIN_BATCH(batch, 2);
1671     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1672     OUT_BATCH(batch,
1673               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1674               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1675     ADVANCE_BATCH(batch);
1676 }
1677
1678 static void
1679 ironlake_pp_constant_buffer(VADriverContextP ctx,
1680                             struct i965_post_processing_context *pp_context)
1681 {
1682     struct intel_batchbuffer *batch = pp_context->batch;
1683
1684     BEGIN_BATCH(batch, 2);
1685     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1686     OUT_RELOC(batch, pp_context->curbe.bo,
1687               I915_GEM_DOMAIN_INSTRUCTION, 0,
1688               pp_context->urb.size_cs_entry - 1);
1689     ADVANCE_BATCH(batch);
1690 }
1691
1692 static void
1693 ironlake_pp_object_walker(VADriverContextP ctx,
1694                           struct i965_post_processing_context *pp_context)
1695 {
1696     struct intel_batchbuffer *batch = pp_context->batch;
1697     int x, x_steps, y, y_steps;
1698     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1699
1700     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1701     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1702
1703     for (y = 0; y < y_steps; y++) {
1704         for (x = 0; x < x_steps; x++) {
1705             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1706                 BEGIN_BATCH(batch, 20);
1707                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1708                 OUT_BATCH(batch, 0);
1709                 OUT_BATCH(batch, 0); /* no indirect data */
1710                 OUT_BATCH(batch, 0);
1711
1712                 /* inline data grf 5-6 */
1713                 assert(sizeof(*pp_inline_parameter) == 64);
1714                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1715
1716                 ADVANCE_BATCH(batch);
1717             }
1718         }
1719     }
1720 }
1721
1722 static void
1723 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1724                            struct i965_post_processing_context *pp_context)
1725 {
1726     struct intel_batchbuffer *batch = pp_context->batch;
1727
1728     intel_batchbuffer_start_atomic(batch, 0x1000);
1729     intel_batchbuffer_emit_mi_flush(batch);
1730     ironlake_pp_pipeline_select(ctx, pp_context);
1731     ironlake_pp_state_base_address(ctx, pp_context);
1732     ironlake_pp_state_pointers(ctx, pp_context);
1733     ironlake_pp_urb_layout(ctx, pp_context);
1734     ironlake_pp_cs_urb_layout(ctx, pp_context);
1735     ironlake_pp_constant_buffer(ctx, pp_context);
1736     ironlake_pp_object_walker(ctx, pp_context);
1737     intel_batchbuffer_end_atomic(batch);
1738 }
1739
1740 // update u/v offset when the surface format are packed yuv
1741 static void i965_update_src_surface_static_parameter(
1742     VADriverContextP    ctx,
1743     struct i965_post_processing_context *pp_context,
1744     const struct i965_surface *surface)
1745 {
1746     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1747     int fourcc = pp_get_surface_fourcc(ctx, surface);
1748
1749     switch (fourcc) {
1750     case VA_FOURCC_YUY2:
1751         pp_static_parameter->grf1.source_packed_u_offset = 1;
1752         pp_static_parameter->grf1.source_packed_v_offset = 3;
1753         break;
1754     case VA_FOURCC_UYVY:
1755         pp_static_parameter->grf1.source_packed_y_offset = 1;
1756         pp_static_parameter->grf1.source_packed_v_offset = 2;
1757         break;
1758     case VA_FOURCC_BGRX:
1759     case VA_FOURCC_BGRA:
1760         pp_static_parameter->grf1.source_rgb_layout = 0;
1761         break;
1762     case VA_FOURCC_RGBX:
1763     case VA_FOURCC_RGBA:
1764         pp_static_parameter->grf1.source_rgb_layout = 1;
1765         break;
1766     default:
1767         break;
1768     }
1769
1770 }
1771
1772 static void i965_update_dst_surface_static_parameter(
1773     VADriverContextP    ctx,
1774     struct i965_post_processing_context *pp_context,
1775     const struct i965_surface *surface)
1776 {
1777     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1778     int fourcc = pp_get_surface_fourcc(ctx, surface);
1779
1780     switch (fourcc) {
1781     case VA_FOURCC_YUY2:
1782         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1783         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1784         break;
1785     case VA_FOURCC_UYVY:
1786         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1787         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1788         break;
1789     case VA_FOURCC_BGRX:
1790     case VA_FOURCC_BGRA:
1791         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1792         break;
1793     case VA_FOURCC_RGBX:
1794     case VA_FOURCC_RGBA:
1795         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1796         break;
1797     default:
1798         break;
1799     }
1800
1801 }
1802
1803 static void
1804 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1805                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1806                           int width, int height, int pitch, int format,
1807                           int index, int is_target)
1808 {
1809     struct i965_surface_state *ss;
1810     dri_bo *ss_bo;
1811     unsigned int tiling;
1812     unsigned int swizzle;
1813
1814     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1815     ss_bo = pp_context->surface_state_binding_table.bo;
1816     assert(ss_bo);
1817
1818     dri_bo_map(ss_bo, True);
1819     assert(ss_bo->virtual);
1820     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1821     memset(ss, 0, sizeof(*ss));
1822     ss->ss0.surface_type = I965_SURFACE_2D;
1823     ss->ss0.surface_format = format;
1824     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1825     ss->ss2.width = width - 1;
1826     ss->ss2.height = height - 1;
1827     ss->ss3.pitch = pitch - 1;
1828     pp_set_surface_tiling(ss, tiling);
1829     dri_bo_emit_reloc(ss_bo,
1830                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1831                       surf_bo_offset,
1832                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1833                       surf_bo);
1834     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1835     dri_bo_unmap(ss_bo);
1836 }
1837
1838 static void
1839 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1840                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1841                            int width, int height, int wpitch,
1842                            int xoffset, int yoffset,
1843                            int format, int interleave_chroma,
1844                            int index)
1845 {
1846     struct i965_surface_state2 *ss2;
1847     dri_bo *ss2_bo;
1848     unsigned int tiling;
1849     unsigned int swizzle;
1850
1851     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1852     ss2_bo = pp_context->surface_state_binding_table.bo;
1853     assert(ss2_bo);
1854
1855     dri_bo_map(ss2_bo, True);
1856     assert(ss2_bo->virtual);
1857     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1858     memset(ss2, 0, sizeof(*ss2));
1859     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1860     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1861     ss2->ss1.width = width - 1;
1862     ss2->ss1.height = height - 1;
1863     ss2->ss2.pitch = wpitch - 1;
1864     ss2->ss2.interleave_chroma = interleave_chroma;
1865     ss2->ss2.surface_format = format;
1866     ss2->ss3.x_offset_for_cb = xoffset;
1867     ss2->ss3.y_offset_for_cb = yoffset;
1868     pp_set_surface2_tiling(ss2, tiling);
1869     dri_bo_emit_reloc(ss2_bo,
1870                       I915_GEM_DOMAIN_RENDER, 0,
1871                       surf_bo_offset,
1872                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1873                       surf_bo);
1874     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1875     dri_bo_unmap(ss2_bo);
1876 }
1877
1878 static void
1879 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1880                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1881                           int width, int height, int pitch, int format,
1882                           int index, int is_target)
1883 {
1884     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1885     struct gen7_surface_state *ss;
1886     dri_bo *ss_bo;
1887     unsigned int tiling;
1888     unsigned int swizzle;
1889
1890     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1891     ss_bo = pp_context->surface_state_binding_table.bo;
1892     assert(ss_bo);
1893
1894     dri_bo_map(ss_bo, True);
1895     assert(ss_bo->virtual);
1896     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1897     memset(ss, 0, sizeof(*ss));
1898     ss->ss0.surface_type = I965_SURFACE_2D;
1899     ss->ss0.surface_format = format;
1900     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1901     ss->ss2.width = width - 1;
1902     ss->ss2.height = height - 1;
1903     ss->ss3.pitch = pitch - 1;
1904     gen7_pp_set_surface_tiling(ss, tiling);
1905     if (IS_HASWELL(i965->intel.device_info))
1906         gen7_render_set_surface_scs(ss);
1907     dri_bo_emit_reloc(ss_bo,
1908                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1909                       surf_bo_offset,
1910                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1911                       surf_bo);
1912     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1913     dri_bo_unmap(ss_bo);
1914 }
1915
1916 static void
1917 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1918                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1919                            int width, int height, int wpitch,
1920                            int xoffset, int yoffset,
1921                            int format, int interleave_chroma,
1922                            int index)
1923 {
1924     struct gen7_surface_state2 *ss2;
1925     dri_bo *ss2_bo;
1926     unsigned int tiling;
1927     unsigned int swizzle;
1928
1929     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1930     ss2_bo = pp_context->surface_state_binding_table.bo;
1931     assert(ss2_bo);
1932
1933     dri_bo_map(ss2_bo, True);
1934     assert(ss2_bo->virtual);
1935     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1936     memset(ss2, 0, sizeof(*ss2));
1937     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1938     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1939     ss2->ss1.width = width - 1;
1940     ss2->ss1.height = height - 1;
1941     ss2->ss2.pitch = wpitch - 1;
1942     ss2->ss2.interleave_chroma = interleave_chroma;
1943     ss2->ss2.surface_format = format;
1944     ss2->ss3.x_offset_for_cb = xoffset;
1945     ss2->ss3.y_offset_for_cb = yoffset;
1946     gen7_pp_set_surface2_tiling(ss2, tiling);
1947     dri_bo_emit_reloc(ss2_bo,
1948                       I915_GEM_DOMAIN_RENDER, 0,
1949                       surf_bo_offset,
1950                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1951                       surf_bo);
1952     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1953     dri_bo_unmap(ss2_bo);
1954 }
1955
1956 static void
1957 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1958                                 const struct i965_surface *surface,
1959                                 int base_index, int is_target,
1960                                 int *width, int *height, int *pitch, int *offset)
1961 {
1962     struct object_surface *obj_surface;
1963     struct object_image *obj_image;
1964     dri_bo *bo;
1965     int fourcc = pp_get_surface_fourcc(ctx, surface);
1966     const int Y = 0;
1967     const int U = ((fourcc == VA_FOURCC_YV12) ||
1968                    (fourcc == VA_FOURCC_YV16))
1969                   ? 2 : 1;
1970     const int V = ((fourcc == VA_FOURCC_YV12) ||
1971                    (fourcc == VA_FOURCC_YV16))
1972                   ? 1 : 2;
1973     const int UV = 1;
1974     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1975     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1976     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1977                               fourcc == VA_FOURCC_RGBX ||
1978                               fourcc == VA_FOURCC_BGRA ||
1979                               fourcc == VA_FOURCC_BGRX);
1980     int scale_factor_of_1st_plane_width_in_byte = 1;
1981
1982     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1983         obj_surface = (struct object_surface *)surface->base;
1984         bo = obj_surface->bo;
1985         width[0] = obj_surface->orig_width;
1986         height[0] = obj_surface->orig_height;
1987         pitch[0] = obj_surface->width;
1988         offset[0] = 0;
1989
1990         if (full_packed_format) {
1991             scale_factor_of_1st_plane_width_in_byte = 4;
1992         } else if (packed_yuv) {
1993             scale_factor_of_1st_plane_width_in_byte =  2;
1994         } else if (interleaved_uv) {
1995             width[1] = obj_surface->orig_width;
1996             height[1] = obj_surface->orig_height / 2;
1997             pitch[1] = obj_surface->width;
1998             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1999         } else {
2000             width[1] = obj_surface->orig_width / 2;
2001             height[1] = obj_surface->orig_height / 2;
2002             pitch[1] = obj_surface->width / 2;
2003             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2004             width[2] = obj_surface->orig_width / 2;
2005             height[2] = obj_surface->orig_height / 2;
2006             pitch[2] = obj_surface->width / 2;
2007             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2008         }
2009     } else {
2010         obj_image = (struct object_image *)surface->base;
2011         bo = obj_image->bo;
2012         width[0] = obj_image->image.width;
2013         height[0] = obj_image->image.height;
2014         pitch[0] = obj_image->image.pitches[0];
2015         offset[0] = obj_image->image.offsets[0];
2016
2017         if (full_packed_format) {
2018             scale_factor_of_1st_plane_width_in_byte = 4;
2019         } else if (packed_yuv) {
2020             scale_factor_of_1st_plane_width_in_byte = 2;
2021         } else if (interleaved_uv) {
2022             width[1] = obj_image->image.width;
2023             height[1] = obj_image->image.height / 2;
2024             pitch[1] = obj_image->image.pitches[1];
2025             offset[1] = obj_image->image.offsets[1];
2026         } else {
2027             width[1] = obj_image->image.width / 2;
2028             height[1] = obj_image->image.height / 2;
2029             pitch[1] = obj_image->image.pitches[1];
2030             offset[1] = obj_image->image.offsets[1];
2031             width[2] = obj_image->image.width / 2;
2032             height[2] = obj_image->image.height / 2;
2033             pitch[2] = obj_image->image.pitches[2];
2034             offset[2] = obj_image->image.offsets[2];
2035             if (fourcc == VA_FOURCC_YV16) {
2036                 width[1] = obj_image->image.width / 2;
2037                 height[1] = obj_image->image.height;
2038                 width[2] = obj_image->image.width / 2;
2039                 height[2] = obj_image->image.height;
2040             }
2041         }
2042     }
2043
2044     /* Y surface */
2045     i965_pp_set_surface_state(ctx, pp_context,
2046                               bo, offset[Y],
2047                               ALIGN(width[Y] *scale_factor_of_1st_plane_width_in_byte, 4) / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2048                               base_index, is_target);
2049
2050     if (!packed_yuv && !full_packed_format) {
2051         if (interleaved_uv) {
2052             i965_pp_set_surface_state(ctx, pp_context,
2053                                       bo, offset[UV],
2054                                       ALIGN(width[UV], 4) / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2055                                       base_index + 1, is_target);
2056         } else {
2057             /* U surface */
2058             i965_pp_set_surface_state(ctx, pp_context,
2059                                       bo, offset[U],
2060                                       ALIGN(width[U], 4) / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2061                                       base_index + 1, is_target);
2062
2063             /* V surface */
2064             i965_pp_set_surface_state(ctx, pp_context,
2065                                       bo, offset[V],
2066                                       ALIGN(width[V], 4) / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2067                                       base_index + 2, is_target);
2068         }
2069     }
2070
2071 }
2072
2073 static void
2074 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2075                                      const struct i965_surface *surface,
2076                                      int base_index, int is_target,
2077                                      const VARectangle *rect,
2078                                      int *width, int *height, int *pitch, int *offset)
2079 {
2080     struct object_surface *obj_surface;
2081     struct object_image *obj_image;
2082     dri_bo *bo;
2083     int fourcc = pp_get_surface_fourcc(ctx, surface);
2084     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
2085
2086     if (fourcc_info == NULL)
2087         return;
2088
2089     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2090         obj_surface = (struct object_surface *)surface->base;
2091         bo = obj_surface->bo;
2092         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
2093         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
2094         pitch[0] = obj_surface->width;
2095         offset[0] = 0;
2096
2097         if (fourcc_info->num_planes == 1 && is_target)
2098             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2099
2100         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2101         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2102         pitch[1] = obj_surface->cb_cr_pitch;
2103         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2104
2105         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2106         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2107         pitch[2] = obj_surface->cb_cr_pitch;
2108         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2109     } else {
2110         int U = 0, V = 0;
2111
2112         /* FIXME: add support for ARGB/ABGR image */
2113         obj_image = (struct object_image *)surface->base;
2114         bo = obj_image->bo;
2115         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
2116         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
2117         pitch[0] = obj_image->image.pitches[0];
2118         offset[0] = obj_image->image.offsets[0];
2119
2120         if (fourcc_info->num_planes == 1) {
2121             if (is_target)
2122                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2123         } else if (fourcc_info->num_planes == 2) {
2124             U = 1, V = 1;
2125         } else {
2126             assert(fourcc_info->num_components == 3);
2127
2128             U = fourcc_info->components[1].plane;
2129             V = fourcc_info->components[2].plane;
2130             assert((U == 1 && V == 2) ||
2131                    (U == 2 && V == 1));
2132         }
2133
2134         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
2135         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2136         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2137         pitch[1] = obj_image->image.pitches[U];
2138         offset[1] = obj_image->image.offsets[U];
2139
2140         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2141         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2142         pitch[2] = obj_image->image.pitches[V];
2143         offset[2] = obj_image->image.offsets[V];
2144     }
2145
2146     if (is_target) {
2147         gen7_pp_set_surface_state(ctx, pp_context,
2148                                   bo, 0,
2149                                   ALIGN(width[0], 4) / 4, height[0], pitch[0],
2150                                   I965_SURFACEFORMAT_R8_UINT,
2151                                   base_index, 1);
2152
2153         if (fourcc_info->num_planes == 2) {
2154             gen7_pp_set_surface_state(ctx, pp_context,
2155                                       bo, offset[1],
2156                                       ALIGN(width[1], 2) / 2, height[1], pitch[1],
2157                                       I965_SURFACEFORMAT_R8G8_SINT,
2158                                       base_index + 1, 1);
2159         } else if (fourcc_info->num_planes == 3) {
2160             gen7_pp_set_surface_state(ctx, pp_context,
2161                                       bo, offset[1],
2162                                       ALIGN(width[1], 4) / 4, height[1], pitch[1],
2163                                       I965_SURFACEFORMAT_R8_SINT,
2164                                       base_index + 1, 1);
2165             gen7_pp_set_surface_state(ctx, pp_context,
2166                                       bo, offset[2],
2167                                       ALIGN(width[2], 4) / 4, height[2], pitch[2],
2168                                       I965_SURFACEFORMAT_R8_SINT,
2169                                       base_index + 2, 1);
2170         }
2171
2172         if (fourcc_info->format == I965_COLOR_RGB) {
2173             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2174             /* the format is MSB: X-B-G-R */
2175             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2176             if ((fourcc == VA_FOURCC_BGRA) ||
2177                 (fourcc == VA_FOURCC_BGRX)) {
2178                 /* It is stored as MSB: X-R-G-B */
2179                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2180             }
2181         }
2182     } else {
2183         int format0 = SURFACE_FORMAT_Y8_UNORM;
2184
2185         switch (fourcc) {
2186         case VA_FOURCC_YUY2:
2187             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2188             break;
2189
2190         case VA_FOURCC_UYVY:
2191             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2192             break;
2193
2194         default:
2195             break;
2196         }
2197
2198         if (fourcc_info->format == I965_COLOR_RGB) {
2199             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2200             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2201             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2202             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2203             if ((fourcc == VA_FOURCC_BGRA) ||
2204                 (fourcc == VA_FOURCC_BGRX)) {
2205                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2206             }
2207         }
2208
2209         gen7_pp_set_surface2_state(ctx, pp_context,
2210                                    bo, offset[0],
2211                                    width[0], height[0], pitch[0],
2212                                    0, 0,
2213                                    format0, 0,
2214                                    base_index);
2215
2216         if (fourcc_info->num_planes == 2) {
2217             gen7_pp_set_surface2_state(ctx, pp_context,
2218                                        bo, offset[1],
2219                                        width[1], height[1], pitch[1],
2220                                        0, 0,
2221                                        SURFACE_FORMAT_R8B8_UNORM, 0,
2222                                        base_index + 1);
2223         } else if (fourcc_info->num_planes == 3) {
2224             gen7_pp_set_surface2_state(ctx, pp_context,
2225                                        bo, offset[1],
2226                                        width[1], height[1], pitch[1],
2227                                        0, 0,
2228                                        SURFACE_FORMAT_R8_UNORM, 0,
2229                                        base_index + 1);
2230             gen7_pp_set_surface2_state(ctx, pp_context,
2231                                        bo, offset[2],
2232                                        width[2], height[2], pitch[2],
2233                                        0, 0,
2234                                        SURFACE_FORMAT_R8_UNORM, 0,
2235                                        base_index + 2);
2236         }
2237     }
2238 }
2239
2240 static int
2241 pp_null_x_steps(void *private_context)
2242 {
2243     return 1;
2244 }
2245
2246 static int
2247 pp_null_y_steps(void *private_context)
2248 {
2249     return 1;
2250 }
2251
2252 static int
2253 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2254 {
2255     return 0;
2256 }
2257
2258 static VAStatus
2259 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2260                    const struct i965_surface *src_surface,
2261                    const VARectangle *src_rect,
2262                    struct i965_surface *dst_surface,
2263                    const VARectangle *dst_rect,
2264                    void *filter_param)
2265 {
2266     /* private function & data */
2267     pp_context->pp_x_steps = pp_null_x_steps;
2268     pp_context->pp_y_steps = pp_null_y_steps;
2269     pp_context->private_context = NULL;
2270     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2271
2272     dst_surface->flags = src_surface->flags;
2273
2274     return VA_STATUS_SUCCESS;
2275 }
2276
2277 static int
2278 pp_load_save_x_steps(void *private_context)
2279 {
2280     return 1;
2281 }
2282
2283 static int
2284 pp_load_save_y_steps(void *private_context)
2285 {
2286     struct pp_load_save_context *pp_load_save_context = private_context;
2287
2288     return pp_load_save_context->dest_h / 8;
2289 }
2290
2291 static int
2292 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2293 {
2294     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2295     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2296
2297     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2298     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2299
2300     return 0;
2301 }
2302
2303 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2304 {
2305     int i;
2306     /* x offset of dest surface must be dword aligned.
2307      * so we have to extend dst surface on left edge, and mask out pixels not interested
2308      */
2309     if (dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT) {
2310         pp_context->block_horizontal_mask_left = 0;
2311         for (i = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT; i < GPU_ASM_BLOCK_WIDTH; i++) {
2312             pp_context->block_horizontal_mask_left |= 1 << i;
2313         }
2314     } else {
2315         pp_context->block_horizontal_mask_left = 0xffff;
2316     }
2317
2318     int dst_width_adjust = dst_rect->width + dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2319     if (dst_width_adjust % GPU_ASM_BLOCK_WIDTH) {
2320         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust % GPU_ASM_BLOCK_WIDTH)) - 1;
2321     } else {
2322         pp_context->block_horizontal_mask_right = 0xffff;
2323     }
2324
2325     if (dst_rect->height % GPU_ASM_BLOCK_HEIGHT) {
2326         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height % GPU_ASM_BLOCK_HEIGHT)) - 1;
2327     } else {
2328         pp_context->block_vertical_mask_bottom = 0xff;
2329     }
2330
2331 }
2332 static VAStatus
2333 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2334                                 const struct i965_surface *src_surface,
2335                                 const VARectangle *src_rect,
2336                                 struct i965_surface *dst_surface,
2337                                 const VARectangle *dst_rect,
2338                                 void *filter_param)
2339 {
2340     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2341     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2342     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2343     int width[3], height[3], pitch[3], offset[3];
2344
2345     /* source surface */
2346     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2347                                     width, height, pitch, offset);
2348
2349     /* destination surface */
2350     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2351                                     width, height, pitch, offset);
2352
2353     /* private function & data */
2354     pp_context->pp_x_steps = pp_load_save_x_steps;
2355     pp_context->pp_y_steps = pp_load_save_y_steps;
2356     pp_context->private_context = &pp_context->pp_load_save_context;
2357     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2358
2359     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;;
2360     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2361     pp_load_save_context->dest_y = dst_rect->y;
2362     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2363     pp_load_save_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2364
2365     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2366     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2367
2368     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2369     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2370
2371     // update u/v offset for packed yuv
2372     i965_update_src_surface_static_parameter(ctx, pp_context, src_surface);
2373     i965_update_dst_surface_static_parameter(ctx, pp_context, dst_surface);
2374
2375     dst_surface->flags = src_surface->flags;
2376
2377     return VA_STATUS_SUCCESS;
2378 }
2379
2380 static int
2381 pp_scaling_x_steps(void *private_context)
2382 {
2383     return 1;
2384 }
2385
2386 static int
2387 pp_scaling_y_steps(void *private_context)
2388 {
2389     struct pp_scaling_context *pp_scaling_context = private_context;
2390
2391     return pp_scaling_context->dest_h / 8;
2392 }
2393
2394 static int
2395 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2396 {
2397     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2398     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2399     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2400     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2401     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2402
2403     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2404     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2405     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2406     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2407
2408     return 0;
2409 }
2410
2411 static VAStatus
2412 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2413                            const struct i965_surface *src_surface,
2414                            const VARectangle *src_rect,
2415                            struct i965_surface *dst_surface,
2416                            const VARectangle *dst_rect,
2417                            void *filter_param)
2418 {
2419     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2420     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2421     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2422     struct object_surface *obj_surface;
2423     struct i965_sampler_state *sampler_state;
2424     int in_w, in_h, in_wpitch, in_hpitch;
2425     int out_w, out_h, out_wpitch, out_hpitch;
2426
2427     /* source surface */
2428     obj_surface = (struct object_surface *)src_surface->base;
2429     in_w = obj_surface->orig_width;
2430     in_h = obj_surface->orig_height;
2431     in_wpitch = obj_surface->width;
2432     in_hpitch = obj_surface->height;
2433
2434     /* source Y surface index 1 */
2435     i965_pp_set_surface_state(ctx, pp_context,
2436                               obj_surface->bo, 0,
2437                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2438                               1, 0);
2439
2440     /* source UV surface index 2 */
2441     i965_pp_set_surface_state(ctx, pp_context,
2442                               obj_surface->bo, in_wpitch * in_hpitch,
2443                               ALIGN(in_w, 2) / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2444                               2, 0);
2445
2446     /* destination surface */
2447     obj_surface = (struct object_surface *)dst_surface->base;
2448     out_w = obj_surface->orig_width;
2449     out_h = obj_surface->orig_height;
2450     out_wpitch = obj_surface->width;
2451     out_hpitch = obj_surface->height;
2452
2453     /* destination Y surface index 7 */
2454     i965_pp_set_surface_state(ctx, pp_context,
2455                               obj_surface->bo, 0,
2456                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2457                               7, 1);
2458
2459     /* destination UV surface index 8 */
2460     i965_pp_set_surface_state(ctx, pp_context,
2461                               obj_surface->bo, out_wpitch * out_hpitch,
2462                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2463                               8, 1);
2464
2465     /* sampler state */
2466     dri_bo_map(pp_context->sampler_state_table.bo, True);
2467     assert(pp_context->sampler_state_table.bo->virtual);
2468     sampler_state = pp_context->sampler_state_table.bo->virtual;
2469
2470     /* SIMD16 Y index 1 */
2471     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2472     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2473     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2474     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2475     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2476
2477     /* SIMD16 UV index 2 */
2478     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2479     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2480     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2481     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2482     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2483
2484     dri_bo_unmap(pp_context->sampler_state_table.bo);
2485
2486     /* private function & data */
2487     pp_context->pp_x_steps = pp_scaling_x_steps;
2488     pp_context->pp_y_steps = pp_scaling_y_steps;
2489     pp_context->private_context = &pp_context->pp_scaling_context;
2490     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2491
2492     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2493     float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
2494     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2495     pp_scaling_context->dest_y = dst_rect->y;
2496     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2497     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2498     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
2499     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2500
2501     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2502
2503     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
2504     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2505     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2506
2507     dst_surface->flags = src_surface->flags;
2508
2509     return VA_STATUS_SUCCESS;
2510 }
2511
2512 static int
2513 pp_avs_x_steps(void *private_context)
2514 {
2515     struct pp_avs_context *pp_avs_context = private_context;
2516
2517     return pp_avs_context->dest_w / 16;
2518 }
2519
2520 static int
2521 pp_avs_y_steps(void *private_context)
2522 {
2523     return 1;
2524 }
2525
2526 static int
2527 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2528 {
2529     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2530     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2531     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2532     float src_x_steping, src_y_steping, video_step_delta;
2533     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2534
2535     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2536         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2537         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2538     } else if (tmp_w >= pp_avs_context->dest_w) {
2539         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2540         pp_inline_parameter->grf6.video_step_delta = 0;
2541
2542         if (x == 0) {
2543             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2544                                                                                                pp_avs_context->src_normalized_x;
2545         } else {
2546             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2547             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2548             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2549                                                                                                 16 * 15 * video_step_delta / 2;
2550         }
2551     } else {
2552         int n0, n1, n2, nls_left, nls_right;
2553         int factor_a = 5, factor_b = 4;
2554         float f;
2555
2556         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2557         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2558         n2 = tmp_w / (16 * factor_a);
2559         nls_left = n0 + n2;
2560         nls_right = n1 + n2;
2561         f = (float) n2 * 16 / tmp_w;
2562
2563         if (n0 < 5) {
2564             pp_inline_parameter->grf6.video_step_delta = 0.0;
2565
2566             if (x == 0) {
2567                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2568                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2569             } else {
2570                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2571                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2572                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2573                                                                                                     16 * 15 * video_step_delta / 2;
2574             }
2575         } else {
2576             if (x < nls_left) {
2577                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2578                 float a = f / (nls_left * 16 * factor_b);
2579                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2580
2581                 pp_inline_parameter->grf6.video_step_delta = b;
2582
2583                 if (x == 0) {
2584                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2585                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2586                 } else {
2587                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2588                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2589                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2590                                                                                                         16 * 15 * video_step_delta / 2;
2591                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2592                 }
2593             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2594                 /* scale the center linearly */
2595                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2596                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2597                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2598                                                                                                     16 * 15 * video_step_delta / 2;
2599                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2600                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2601             } else {
2602                 float a = f / (nls_right * 16 * factor_b);
2603                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2604
2605                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2606                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2607                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2608                                                                                                     16 * 15 * video_step_delta / 2;
2609                 pp_inline_parameter->grf6.video_step_delta = -b;
2610
2611                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2612                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2613                 else
2614                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2615             }
2616         }
2617     }
2618
2619     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2620     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2621     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2622     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2623
2624     return 0;
2625 }
2626
2627 static const AVSConfig gen5_avs_config = {
2628     .coeff_frac_bits = 6,
2629     .coeff_epsilon = 1.0f / (1U << 6),
2630     .num_phases = 16,
2631     .num_luma_coeffs = 8,
2632     .num_chroma_coeffs = 4,
2633
2634     .coeff_range = {
2635         .lower_bound = {
2636             .y_k_h = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2637             .y_k_v = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2638             .uv_k_h = { -1, 0, 0, -1 },
2639             .uv_k_v = { -1, 0, 0, -1 },
2640         },
2641         .upper_bound = {
2642             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2643             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2644             .uv_k_h = { 1, 2, 2, 1 },
2645             .uv_k_v = { 1, 2, 2, 1 },
2646         },
2647     },
2648 };
2649
2650 static const AVSConfig gen6_avs_config = {
2651     .coeff_frac_bits = 6,
2652     .coeff_epsilon = 1.0f / (1U << 6),
2653     .num_phases = 16,
2654     .num_luma_coeffs = 8,
2655     .num_chroma_coeffs = 4,
2656
2657     .coeff_range = {
2658         .lower_bound = {
2659             .y_k_h = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2660             .y_k_v = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2661             .uv_k_h = { -1, 0, 0, -1 },
2662             .uv_k_v = { -1, 0, 0, -1 },
2663         },
2664         .upper_bound = {
2665             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2666             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2667             .uv_k_h = { 1, 2, 2, 1 },
2668             .uv_k_v = { 1, 2, 2, 1 },
2669         },
2670     },
2671 };
2672
2673 static VAStatus
2674 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2675                        const struct i965_surface *src_surface,
2676                        const VARectangle *src_rect,
2677                        struct i965_surface *dst_surface,
2678                        const VARectangle *dst_rect,
2679                        void *filter_param)
2680 {
2681     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2682     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2683     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2684     struct object_surface *obj_surface;
2685     struct i965_sampler_8x8 *sampler_8x8;
2686     struct i965_sampler_8x8_state *sampler_8x8_state;
2687     int index;
2688     int in_w, in_h, in_wpitch, in_hpitch;
2689     int out_w, out_h, out_wpitch, out_hpitch;
2690     int i;
2691     AVSState * const avs = &pp_avs_context->state;
2692     float sx, sy;
2693
2694     const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
2695                      VA_FILTER_SCALING_NL_ANAMORPHIC;
2696
2697     /* surface */
2698     obj_surface = (struct object_surface *)src_surface->base;
2699     in_w = obj_surface->orig_width;
2700     in_h = obj_surface->orig_height;
2701     in_wpitch = obj_surface->width;
2702     in_hpitch = obj_surface->height;
2703
2704     /* source Y surface index 1 */
2705     i965_pp_set_surface2_state(ctx, pp_context,
2706                                obj_surface->bo, 0,
2707                                in_w, in_h, in_wpitch,
2708                                0, 0,
2709                                SURFACE_FORMAT_Y8_UNORM, 0,
2710                                1);
2711
2712     /* source UV surface index 2 */
2713     i965_pp_set_surface2_state(ctx, pp_context,
2714                                obj_surface->bo, in_wpitch * in_hpitch,
2715                                in_w / 2, in_h / 2, in_wpitch,
2716                                0, 0,
2717                                SURFACE_FORMAT_R8B8_UNORM, 0,
2718                                2);
2719
2720     /* destination surface */
2721     obj_surface = (struct object_surface *)dst_surface->base;
2722     out_w = obj_surface->orig_width;
2723     out_h = obj_surface->orig_height;
2724     out_wpitch = obj_surface->width;
2725     out_hpitch = obj_surface->height;
2726     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2727
2728     /* destination Y surface index 7 */
2729     i965_pp_set_surface_state(ctx, pp_context,
2730                               obj_surface->bo, 0,
2731                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2732                               7, 1);
2733
2734     /* destination UV surface index 8 */
2735     i965_pp_set_surface_state(ctx, pp_context,
2736                               obj_surface->bo, out_wpitch * out_hpitch,
2737                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2738                               8, 1);
2739
2740     /* sampler 8x8 state */
2741     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2742     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2743     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2744     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2745     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2746
2747     sx = (float)dst_rect->width / src_rect->width;
2748     sy = (float)dst_rect->height / src_rect->height;
2749     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
2750
2751     assert(avs->config->num_phases == 16);
2752     for (i = 0; i <= 16; i++) {
2753         const AVSCoeffs * const coeffs = &avs->coeffs[i];
2754
2755         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
2756             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
2757         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
2758             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
2759         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
2760             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
2761         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
2762             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
2763         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
2764             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
2765         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
2766             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
2767         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
2768             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
2769         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
2770             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
2771
2772         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
2773             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
2774         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
2775             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
2776         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
2777             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
2778         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
2779             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
2780
2781         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
2782             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
2783         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
2784             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
2785         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
2786             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
2787         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
2788             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
2789         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
2790             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
2791         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
2792             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
2793         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
2794             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
2795         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
2796             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
2797
2798         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
2799             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
2800         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
2801             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
2802         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
2803             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
2804         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
2805             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
2806     }
2807
2808     /* Adaptive filter for all channels (DW4.15) */
2809     sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
2810
2811     sampler_8x8_state->dw136.default_sharpness_level =
2812         -avs_is_needed(pp_context->filter_flags);
2813     sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
2814     sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
2815     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2816
2817     /* sampler 8x8 */
2818     dri_bo_map(pp_context->sampler_state_table.bo, True);
2819     assert(pp_context->sampler_state_table.bo->virtual);
2820     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2821     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2822
2823     /* sample_8x8 Y index 1 */
2824     index = 1;
2825     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2826     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2827     sampler_8x8[index].dw0.ief_bypass = 1;
2828     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2829     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2830     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2831     sampler_8x8[index].dw2.global_noise_estimation = 22;
2832     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2833     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2834     sampler_8x8[index].dw3.strong_edge_weight = 7;
2835     sampler_8x8[index].dw3.regular_weight = 2;
2836     sampler_8x8[index].dw3.non_edge_weight = 0;
2837     sampler_8x8[index].dw3.gain_factor = 40;
2838     sampler_8x8[index].dw4.steepness_boost = 0;
2839     sampler_8x8[index].dw4.steepness_threshold = 0;
2840     sampler_8x8[index].dw4.mr_boost = 0;
2841     sampler_8x8[index].dw4.mr_threshold = 5;
2842     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2843     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2844     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2845     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2846     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2847     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2848     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2849     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2850     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2851     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2852     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2853     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2854     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2855     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2856     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2857     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2858     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2859     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2860     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2861     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2862     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2863     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2864     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2865     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2866     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2867     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2868     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2869     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2870     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2871     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2872     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2873     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2874     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2875     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2876     sampler_8x8[index].dw13.limiter_boost = 0;
2877     sampler_8x8[index].dw13.minimum_limiter = 10;
2878     sampler_8x8[index].dw13.maximum_limiter = 11;
2879     sampler_8x8[index].dw14.clip_limiter = 130;
2880     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2881                       I915_GEM_DOMAIN_RENDER,
2882                       0,
2883                       0,
2884                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2885                       pp_context->sampler_state_table.bo_8x8);
2886
2887     /* sample_8x8 UV index 2 */
2888     index = 2;
2889     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2890     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2891     sampler_8x8[index].dw0.ief_bypass = 1;
2892     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2893     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2894     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2895     sampler_8x8[index].dw2.global_noise_estimation = 22;
2896     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2897     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2898     sampler_8x8[index].dw3.strong_edge_weight = 7;
2899     sampler_8x8[index].dw3.regular_weight = 2;
2900     sampler_8x8[index].dw3.non_edge_weight = 0;
2901     sampler_8x8[index].dw3.gain_factor = 40;
2902     sampler_8x8[index].dw4.steepness_boost = 0;
2903     sampler_8x8[index].dw4.steepness_threshold = 0;
2904     sampler_8x8[index].dw4.mr_boost = 0;
2905     sampler_8x8[index].dw4.mr_threshold = 5;
2906     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2907     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2908     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2909     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2910     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2911     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2912     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2913     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2914     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2915     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2916     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2917     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2918     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2919     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2920     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2921     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2922     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2923     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2924     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2925     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2926     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2927     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2928     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2929     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2930     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2931     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2932     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2933     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2934     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2935     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2936     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2937     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2938     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2939     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2940     sampler_8x8[index].dw13.limiter_boost = 0;
2941     sampler_8x8[index].dw13.minimum_limiter = 10;
2942     sampler_8x8[index].dw13.maximum_limiter = 11;
2943     sampler_8x8[index].dw14.clip_limiter = 130;
2944     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2945                       I915_GEM_DOMAIN_RENDER,
2946                       0,
2947                       0,
2948                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2949                       pp_context->sampler_state_table.bo_8x8);
2950
2951     dri_bo_unmap(pp_context->sampler_state_table.bo);
2952
2953     /* private function & data */
2954     pp_context->pp_x_steps = pp_avs_x_steps;
2955     pp_context->pp_y_steps = pp_avs_y_steps;
2956     pp_context->private_context = &pp_context->pp_avs_context;
2957     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2958
2959     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2960     float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
2961     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2962     pp_avs_context->dest_y = dst_rect->y;
2963     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2964     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2965     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
2966     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2967     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2968     pp_avs_context->src_h = src_rect->height;
2969
2970     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2971     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2972
2973     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
2974     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2975     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2976     pp_inline_parameter->grf6.video_step_delta = 0.0;
2977
2978     dst_surface->flags = src_surface->flags;
2979
2980     return VA_STATUS_SUCCESS;
2981 }
2982
2983 static VAStatus
2984 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2985                              const struct i965_surface *src_surface,
2986                              const VARectangle *src_rect,
2987                              struct i965_surface *dst_surface,
2988                              const VARectangle *dst_rect,
2989                              void *filter_param)
2990 {
2991     return pp_nv12_avs_initialize(ctx, pp_context,
2992                                   src_surface,
2993                                   src_rect,
2994                                   dst_surface,
2995                                   dst_rect,
2996                                   filter_param);
2997 }
2998
2999 static int
3000 gen7_pp_avs_x_steps(void *private_context)
3001 {
3002     struct pp_avs_context *pp_avs_context = private_context;
3003
3004     return pp_avs_context->dest_w / 16;
3005 }
3006
3007 static int
3008 gen7_pp_avs_y_steps(void *private_context)
3009 {
3010     struct pp_avs_context *pp_avs_context = private_context;
3011
3012     return pp_avs_context->dest_h / 16;
3013 }
3014
3015 static int
3016 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3017 {
3018     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3019     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3020
3021     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3022     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3023     pp_inline_parameter->grf9.constant_0 = 0xffffffff;
3024     pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3025
3026     return 0;
3027 }
3028
3029 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx,
3030                                               struct i965_post_processing_context *pp_context,
3031                                               const struct i965_surface *surface)
3032 {
3033     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3034     int fourcc = pp_get_surface_fourcc(ctx, surface);
3035
3036     if (fourcc == VA_FOURCC_YUY2) {
3037         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3038         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3039         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3040     } else if (fourcc == VA_FOURCC_UYVY) {
3041         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3042         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3043         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3044     }
3045 }
3046
3047 static VAStatus
3048 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3049                            const struct i965_surface *src_surface,
3050                            const VARectangle *src_rect,
3051                            struct i965_surface *dst_surface,
3052                            const VARectangle *dst_rect,
3053                            void *filter_param)
3054 {
3055     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3056     struct i965_driver_data *i965 = i965_driver_data(ctx);
3057     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3058     struct gen7_sampler_8x8 *sampler_8x8;
3059     struct i965_sampler_8x8_state *sampler_8x8_state;
3060     int index, i;
3061     int width[3], height[3], pitch[3], offset[3];
3062     int src_width, src_height;
3063     AVSState * const avs = &pp_avs_context->state;
3064     float sx, sy;
3065     const float * yuv_to_rgb_coefs;
3066     size_t yuv_to_rgb_coefs_size;
3067
3068     /* source surface */
3069     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3070                                          src_rect,
3071                                          width, height, pitch, offset);
3072     src_width = width[0];
3073     src_height = height[0];
3074
3075     /* destination surface */
3076     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3077                                          dst_rect,
3078                                          width, height, pitch, offset);
3079
3080     /* sampler 8x8 state */
3081     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3082     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3083     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3084     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3085     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3086
3087     sx = (float)dst_rect->width / src_rect->width;
3088     sy = (float)dst_rect->height / src_rect->height;
3089     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
3090
3091     assert(avs->config->num_phases == 16);
3092     for (i = 0; i <= 16; i++) {
3093         const AVSCoeffs * const coeffs = &avs->coeffs[i];
3094
3095         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
3096             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
3097         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
3098             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
3099         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
3100             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
3101         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
3102             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
3103         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
3104             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
3105         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
3106             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
3107         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
3108             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
3109         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
3110             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
3111
3112         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
3113             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
3114         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
3115             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
3116         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
3117             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
3118         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
3119             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
3120
3121         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
3122             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
3123         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
3124             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
3125         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
3126             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
3127         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
3128             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
3129         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
3130             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
3131         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
3132             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
3133         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
3134             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
3135         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
3136             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
3137
3138         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
3139             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
3140         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
3141             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
3142         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
3143             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
3144         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
3145             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
3146     }
3147
3148     sampler_8x8_state->dw136.default_sharpness_level =
3149         -avs_is_needed(pp_context->filter_flags);
3150     if (IS_HASWELL(i965->intel.device_info)) {
3151         sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
3152         sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
3153         sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
3154     } else {
3155         sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
3156         sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
3157         sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
3158     }
3159     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3160
3161     /* sampler 8x8 */
3162     dri_bo_map(pp_context->sampler_state_table.bo, True);
3163     assert(pp_context->sampler_state_table.bo->virtual);
3164     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3165     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3166
3167     /* sample_8x8 Y index 4 */
3168     index = 4;
3169     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3170     sampler_8x8[index].dw0.global_noise_estimation = 255;
3171     sampler_8x8[index].dw0.ief_bypass = 1;
3172
3173     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3174
3175     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3176     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3177     sampler_8x8[index].dw2.r5x_coefficient = 9;
3178     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3179     sampler_8x8[index].dw2.r5c_coefficient = 3;
3180
3181     sampler_8x8[index].dw3.r3x_coefficient = 27;
3182     sampler_8x8[index].dw3.r3c_coefficient = 5;
3183     sampler_8x8[index].dw3.gain_factor = 40;
3184     sampler_8x8[index].dw3.non_edge_weight = 1;
3185     sampler_8x8[index].dw3.regular_weight = 2;
3186     sampler_8x8[index].dw3.strong_edge_weight = 7;
3187     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3188
3189     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3190                       I915_GEM_DOMAIN_RENDER,
3191                       0,
3192                       0,
3193                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3194                       pp_context->sampler_state_table.bo_8x8);
3195
3196     /* sample_8x8 UV index 8 */
3197     index = 8;
3198     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3199     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3200     sampler_8x8[index].dw0.global_noise_estimation = 255;
3201     sampler_8x8[index].dw0.ief_bypass = 1;
3202     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3203     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3204     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3205     sampler_8x8[index].dw2.r5x_coefficient = 9;
3206     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3207     sampler_8x8[index].dw2.r5c_coefficient = 3;
3208     sampler_8x8[index].dw3.r3x_coefficient = 27;
3209     sampler_8x8[index].dw3.r3c_coefficient = 5;
3210     sampler_8x8[index].dw3.gain_factor = 40;
3211     sampler_8x8[index].dw3.non_edge_weight = 1;
3212     sampler_8x8[index].dw3.regular_weight = 2;
3213     sampler_8x8[index].dw3.strong_edge_weight = 7;
3214     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3215
3216     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3217                       I915_GEM_DOMAIN_RENDER,
3218                       0,
3219                       0,
3220                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3221                       pp_context->sampler_state_table.bo_8x8);
3222
3223     /* sampler_8x8 V, index 12 */
3224     index = 12;
3225     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3226     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3227     sampler_8x8[index].dw0.global_noise_estimation = 255;
3228     sampler_8x8[index].dw0.ief_bypass = 1;
3229     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3230     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3231     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3232     sampler_8x8[index].dw2.r5x_coefficient = 9;
3233     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3234     sampler_8x8[index].dw2.r5c_coefficient = 3;
3235     sampler_8x8[index].dw3.r3x_coefficient = 27;
3236     sampler_8x8[index].dw3.r3c_coefficient = 5;
3237     sampler_8x8[index].dw3.gain_factor = 40;
3238     sampler_8x8[index].dw3.non_edge_weight = 1;
3239     sampler_8x8[index].dw3.regular_weight = 2;
3240     sampler_8x8[index].dw3.strong_edge_weight = 7;
3241     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3242
3243     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3244                       I915_GEM_DOMAIN_RENDER,
3245                       0,
3246                       0,
3247                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3248                       pp_context->sampler_state_table.bo_8x8);
3249
3250     dri_bo_unmap(pp_context->sampler_state_table.bo);
3251
3252     /* private function & data */
3253     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3254     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3255     pp_context->private_context = &pp_context->pp_avs_context;
3256     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3257
3258     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
3259     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3260     pp_avs_context->dest_y = dst_rect->y;
3261     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3262     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3263     pp_avs_context->src_w = src_rect->width;
3264     pp_avs_context->src_h = src_rect->height;
3265     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3266
3267     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3268     dw = MAX(dw, dst_rect->width + dst_left_edge_extend);
3269
3270     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3271     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3272     if (IS_HASWELL(i965->intel.device_info))
3273         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3274
3275     if (pp_static_parameter->grf2.avs_wa_enable) {
3276         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3277         if ((src_fourcc == VA_FOURCC_RGBA) ||
3278             (src_fourcc == VA_FOURCC_RGBX) ||
3279             (src_fourcc == VA_FOURCC_BGRA) ||
3280             (src_fourcc == VA_FOURCC_BGRX)) {
3281             pp_static_parameter->grf2.avs_wa_enable = 0;
3282         }
3283     }
3284
3285     pp_static_parameter->grf2.avs_wa_width = src_width;
3286     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3287     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3288     pp_static_parameter->grf2.alpha = 255;
3289
3290     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3291     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3292     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3293                                                                    (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3294     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3295                                                                      (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3296
3297     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3298
3299     yuv_to_rgb_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(src_surface->flags &
3300                                                                                   VA_SRC_COLOR_MASK),
3301                                                     &yuv_to_rgb_coefs_size);
3302     memcpy(&pp_static_parameter->grf7, yuv_to_rgb_coefs, yuv_to_rgb_coefs_size);
3303
3304     dst_surface->flags = src_surface->flags;
3305
3306     return VA_STATUS_SUCCESS;
3307 }
3308
3309 static int
3310 pp_dndi_x_steps(void *private_context)
3311 {
3312     return 1;
3313 }
3314
3315 static int
3316 pp_dndi_y_steps(void *private_context)
3317 {
3318     struct pp_dndi_context *pp_dndi_context = private_context;
3319
3320     return pp_dndi_context->dest_h / 4;
3321 }
3322
3323 static int
3324 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3325 {
3326     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3327
3328     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3329     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3330
3331     return 0;
3332 }
3333
3334 static VAStatus
3335 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3336                         const struct i965_surface *src_surface,
3337                         const VARectangle *src_rect,
3338                         struct i965_surface *dst_surface,
3339                         const VARectangle *dst_rect,
3340                         void *filter_param)
3341 {
3342     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3343     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3344     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3345     const VAProcPipelineParameterBuffer * const pipe_params =
3346         pp_context->pipeline_param;
3347     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3348         filter_param;
3349     struct object_surface * const src_obj_surface = (struct object_surface *)
3350                                                     src_surface->base;
3351     struct object_surface * const dst_obj_surface = (struct object_surface *)
3352                                                     dst_surface->base;
3353     struct object_surface *obj_surface;
3354     struct i965_sampler_dndi *sampler_dndi;
3355     int index, dndi_top_first;
3356     int w, h, orig_w, orig_h;
3357     VAStatus status;
3358
3359     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3360                                                  pipe_params, deint_params);
3361     if (status != VA_STATUS_SUCCESS)
3362         return status;
3363
3364     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3365                                              src_obj_surface, dst_obj_surface);
3366     if (status != VA_STATUS_SUCCESS)
3367         return status;
3368
3369     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3370                                                      src_obj_surface, dst_obj_surface);
3371     if (status != VA_STATUS_SUCCESS)
3372         return status;
3373
3374     /* Current input surface (index = 4) */
3375     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3376     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3377                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3378                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3379
3380     /* Previous input surface (index = 5) */
3381     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3382     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3383                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3384                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
3385
3386     /* STMM input surface (index = 6) */
3387     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3388     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3389                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3390                               I965_SURFACEFORMAT_R8_UNORM, 6, 1);
3391
3392     /* Previous output surfaces (index = { 7, 8 }) */
3393     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3394     w = obj_surface->width;
3395     h = obj_surface->height;
3396     orig_w = obj_surface->orig_width;
3397     orig_h = obj_surface->orig_height;
3398
3399     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3400                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
3401     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3402                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
3403
3404     /* Current output surfaces (index = { 10, 11 }) */
3405     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3406     w = obj_surface->width;
3407     h = obj_surface->height;
3408     orig_w = obj_surface->orig_width;
3409     orig_h = obj_surface->orig_height;
3410
3411     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3412                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
3413     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3414                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
3415
3416     /* STMM output surface (index = 20) */
3417     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3418     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3419                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3420                               I965_SURFACEFORMAT_R8_UNORM, 20, 1);
3421
3422     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3423
3424     /* sampler dndi */
3425     dri_bo_map(pp_context->sampler_state_table.bo, True);
3426     assert(pp_context->sampler_state_table.bo->virtual);
3427     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3428     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3429
3430     /* sample dndi index 1 */
3431     index = 0;
3432     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3433     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3434     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3435     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3436
3437     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3438     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3439     sampler_dndi[index].dw1.stmm_c2 = 1;
3440     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3441     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3442
3443     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3444     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3445     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3446     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3447
3448     sampler_dndi[index].dw3.maximum_stmm = 150;
3449     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3450     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3451     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3452     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3453
3454     sampler_dndi[index].dw4.sdi_delta = 5;
3455     sampler_dndi[index].dw4.sdi_threshold = 100;
3456     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3457     sampler_dndi[index].dw4.stmm_shift_up = 1;
3458     sampler_dndi[index].dw4.stmm_shift_down = 3;
3459     sampler_dndi[index].dw4.minimum_stmm = 118;
3460
3461     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3462     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3463     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3464     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3465
3466     sampler_dndi[index].dw6.dn_enable = 1;
3467     sampler_dndi[index].dw6.di_enable = 1;
3468     sampler_dndi[index].dw6.di_partial = 0;
3469     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3470     sampler_dndi[index].dw6.dndi_stream_id = 0;
3471     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3472     sampler_dndi[index].dw6.progressive_dn = 0;
3473     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3474     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3475     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3476
3477     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3478     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3479     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3480     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3481
3482     dri_bo_unmap(pp_context->sampler_state_table.bo);
3483
3484     /* private function & data */
3485     pp_context->pp_x_steps = pp_dndi_x_steps;
3486     pp_context->pp_y_steps = pp_dndi_y_steps;
3487     pp_context->private_context = dndi_ctx;
3488     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3489
3490     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3491     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3492     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3493     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3494
3495     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3496     pp_inline_parameter->grf5.number_blocks = w / 16;
3497     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3498     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3499
3500     dndi_ctx->dest_w = w;
3501     dndi_ctx->dest_h = h;
3502
3503     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3504     return VA_STATUS_SUCCESS;
3505 }
3506
3507 static int
3508 pp_dn_x_steps(void *private_context)
3509 {
3510     return 1;
3511 }
3512
3513 static int
3514 pp_dn_y_steps(void *private_context)
3515 {
3516     struct pp_dn_context *pp_dn_context = private_context;
3517
3518     return pp_dn_context->dest_h / 8;
3519 }
3520
3521 static int
3522 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3523 {
3524     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3525
3526     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3527     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3528
3529     return 0;
3530 }
3531
3532 static VAStatus
3533 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3534                       const struct i965_surface *src_surface,
3535                       const VARectangle *src_rect,
3536                       struct i965_surface *dst_surface,
3537                       const VARectangle *dst_rect,
3538                       void *filter_param)
3539 {
3540     struct i965_driver_data *i965 = i965_driver_data(ctx);
3541     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3542     struct object_surface *obj_surface;
3543     struct i965_sampler_dndi *sampler_dndi;
3544     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3545     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3546     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3547     int index;
3548     int w, h;
3549     int orig_w, orig_h;
3550     int dn_strength = 15;
3551     int dndi_top_first = 1;
3552     int dn_progressive = 0;
3553
3554     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3555         dndi_top_first = 1;
3556         dn_progressive = 1;
3557     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3558         dndi_top_first = 1;
3559         dn_progressive = 0;
3560     } else {
3561         dndi_top_first = 0;
3562         dn_progressive = 0;
3563     }
3564
3565     if (dn_filter_param) {
3566         float value = dn_filter_param->value;
3567
3568         if (value > 1.0)
3569             value = 1.0;
3570
3571         if (value < 0.0)
3572             value = 0.0;
3573
3574         dn_strength = (int)(value * 31.0F);
3575     }
3576
3577     /* surface */
3578     obj_surface = (struct object_surface *)src_surface->base;
3579     orig_w = obj_surface->orig_width;
3580     orig_h = obj_surface->orig_height;
3581     w = obj_surface->width;
3582     h = obj_surface->height;
3583
3584     if (pp_dn_context->stmm_bo == NULL) {
3585         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3586                                               "STMM surface",
3587                                               w * h,
3588                                               4096);
3589         assert(pp_dn_context->stmm_bo);
3590     }
3591
3592     /* source UV surface index 2 */
3593     i965_pp_set_surface_state(ctx, pp_context,
3594                               obj_surface->bo, w * h,
3595                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3596                               2, 0);
3597
3598     /* source YUV surface index 4 */
3599     i965_pp_set_surface2_state(ctx, pp_context,
3600                                obj_surface->bo, 0,
3601                                orig_w, orig_h, w,
3602                                0, h,
3603                                SURFACE_FORMAT_PLANAR_420_8, 1,
3604                                4);
3605
3606     /* source STMM surface index 20 */
3607     i965_pp_set_surface_state(ctx, pp_context,
3608                               pp_dn_context->stmm_bo, 0,
3609                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3610                               20, 1);
3611
3612     /* destination surface */
3613     obj_surface = (struct object_surface *)dst_surface->base;
3614     orig_w = obj_surface->orig_width;
3615     orig_h = obj_surface->orig_height;
3616     w = obj_surface->width;
3617     h = obj_surface->height;
3618
3619     /* destination Y surface index 7 */
3620     i965_pp_set_surface_state(ctx, pp_context,
3621                               obj_surface->bo, 0,
3622                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3623                               7, 1);
3624
3625     /* destination UV surface index 8 */
3626     i965_pp_set_surface_state(ctx, pp_context,
3627                               obj_surface->bo, w * h,
3628                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3629                               8, 1);
3630     /* sampler dn */
3631     dri_bo_map(pp_context->sampler_state_table.bo, True);
3632     assert(pp_context->sampler_state_table.bo->virtual);
3633     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3634     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3635
3636     /* sample dndi index 1 */
3637     index = 0;
3638     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3639     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3640     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3641     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3642
3643     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3644     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3645     sampler_dndi[index].dw1.stmm_c2 = 0;
3646     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3647     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3648
3649     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3650     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3651     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3652     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3653
3654     sampler_dndi[index].dw3.maximum_stmm = 128;
3655     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3656     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3657     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3658     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3659
3660     sampler_dndi[index].dw4.sdi_delta = 8;
3661     sampler_dndi[index].dw4.sdi_threshold = 128;
3662     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3663     sampler_dndi[index].dw4.stmm_shift_up = 0;
3664     sampler_dndi[index].dw4.stmm_shift_down = 0;
3665     sampler_dndi[index].dw4.minimum_stmm = 0;
3666
3667     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3668     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3669     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3670     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3671
3672     sampler_dndi[index].dw6.dn_enable = 1;
3673     sampler_dndi[index].dw6.di_enable = 0;
3674     sampler_dndi[index].dw6.di_partial = 0;
3675     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3676     sampler_dndi[index].dw6.dndi_stream_id = 1;
3677     sampler_dndi[index].dw6.dndi_first_frame = 1;
3678     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3679     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3680     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3681     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3682
3683     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3684     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3685     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3686     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3687
3688     dri_bo_unmap(pp_context->sampler_state_table.bo);
3689
3690     /* private function & data */
3691     pp_context->pp_x_steps = pp_dn_x_steps;
3692     pp_context->pp_y_steps = pp_dn_y_steps;
3693     pp_context->private_context = &pp_context->pp_dn_context;
3694     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3695
3696     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3697     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3698     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3699     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3700
3701     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3702     pp_inline_parameter->grf5.number_blocks = w / 16;
3703     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3704     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3705
3706     pp_dn_context->dest_w = w;
3707     pp_dn_context->dest_h = h;
3708
3709     dst_surface->flags = src_surface->flags;
3710
3711     return VA_STATUS_SUCCESS;
3712 }
3713
3714 static int
3715 gen7_pp_dndi_x_steps(void *private_context)
3716 {
3717     struct pp_dndi_context *pp_dndi_context = private_context;
3718
3719     return pp_dndi_context->dest_w / 16;
3720 }
3721
3722 static int
3723 gen7_pp_dndi_y_steps(void *private_context)
3724 {
3725     struct pp_dndi_context *pp_dndi_context = private_context;
3726
3727     return pp_dndi_context->dest_h / 4;
3728 }
3729
3730 static int
3731 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3732 {
3733     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3734
3735     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16;
3736     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 4;
3737
3738     return 0;
3739 }
3740
3741 static VAStatus
3742 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3743                              const struct i965_surface *src_surface,
3744                              const VARectangle *src_rect,
3745                              struct i965_surface *dst_surface,
3746                              const VARectangle *dst_rect,
3747                              void *filter_param)
3748 {
3749     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3750     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3751     const VAProcPipelineParameterBuffer * const pipe_params =
3752         pp_context->pipeline_param;
3753     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3754         filter_param;
3755     struct object_surface * const src_obj_surface = (struct object_surface *)
3756                                                     src_surface->base;
3757     struct object_surface * const dst_obj_surface = (struct object_surface *)
3758                                                     dst_surface->base;
3759     struct object_surface *obj_surface;
3760     struct gen7_sampler_dndi *sampler_dndi;
3761     int index, dndi_top_first;
3762     int w, h, orig_w, orig_h;
3763     VAStatus status;
3764
3765     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3766                                                  pipe_params, deint_params);
3767     if (status != VA_STATUS_SUCCESS)
3768         return status;
3769
3770     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3771                                              src_obj_surface, dst_obj_surface);
3772     if (status != VA_STATUS_SUCCESS)
3773         return status;
3774
3775     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3776                                                      src_obj_surface, dst_obj_surface);
3777     if (status != VA_STATUS_SUCCESS)
3778         return status;
3779
3780     /* Current input surface (index = 3) */
3781     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3782     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3783                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3784                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
3785
3786     /* Previous input surface (index = 4) */
3787     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3788     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3789                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3790                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3791
3792     /* STMM input surface (index = 5) */
3793     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3794     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3795                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3796                               I965_SURFACEFORMAT_R8_UNORM, 5, 1);
3797
3798     /* Previous output surfaces (index = { 27, 28 }) */
3799     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3800     w = obj_surface->width;
3801     h = obj_surface->height;
3802     orig_w = obj_surface->orig_width;
3803     orig_h = obj_surface->orig_height;
3804
3805     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3806                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
3807     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3808                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
3809
3810     /* Current output surfaces (index = { 30, 31 }) */
3811     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3812     w = obj_surface->width;
3813     h = obj_surface->height;
3814     orig_w = obj_surface->orig_width;
3815     orig_h = obj_surface->orig_height;
3816
3817     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3818                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
3819     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3820                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
3821
3822     /* STMM output surface (index = 33) */
3823     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3824     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3825                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3826                               I965_SURFACEFORMAT_R8_UNORM, 33, 1);
3827
3828     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3829
3830     /* sampler dndi */
3831     dri_bo_map(pp_context->sampler_state_table.bo, True);
3832     assert(pp_context->sampler_state_table.bo->virtual);
3833     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3834     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3835
3836     /* sample dndi index 0 */
3837     index = 0;
3838     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3839     sampler_dndi[index].dw0.dnmh_delt = 7;
3840     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3841     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3842     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3843     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3844
3845     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3846     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3847     sampler_dndi[index].dw1.stmm_c2 = 2;
3848     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3849     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3850
3851     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3852     sampler_dndi[index].dw2.bne_edge_th = 1;
3853     sampler_dndi[index].dw2.smooth_mv_th = 0;
3854     sampler_dndi[index].dw2.sad_tight_th = 5;
3855     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3856     sampler_dndi[index].dw2.good_neighbor_th = 12;
3857
3858     sampler_dndi[index].dw3.maximum_stmm = 150;
3859     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3860     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3861     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3862     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3863
3864     sampler_dndi[index].dw4.sdi_delta = 5;
3865     sampler_dndi[index].dw4.sdi_threshold = 100;
3866     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3867     sampler_dndi[index].dw4.stmm_shift_up = 1;
3868     sampler_dndi[index].dw4.stmm_shift_down = 3;
3869     sampler_dndi[index].dw4.minimum_stmm = 118;
3870
3871     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3872     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3873     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3874     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3875     sampler_dndi[index].dw6.dn_enable = 0;
3876     sampler_dndi[index].dw6.di_enable = 1;
3877     sampler_dndi[index].dw6.di_partial = 0;
3878     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3879     sampler_dndi[index].dw6.dndi_stream_id = 1;
3880     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3881     sampler_dndi[index].dw6.progressive_dn = 0;
3882     sampler_dndi[index].dw6.mcdi_enable =
3883         (deint_params->algorithm == VAProcDeinterlacingMotionCompensated);
3884     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3885     sampler_dndi[index].dw6.cat_th1 = 0;
3886     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3887     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3888
3889     sampler_dndi[index].dw7.sad_tha = 5;
3890     sampler_dndi[index].dw7.sad_thb = 10;
3891     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3892     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3893     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3894     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3895     sampler_dndi[index].dw7.neighborpixel_th = 10;
3896     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3897
3898     dri_bo_unmap(pp_context->sampler_state_table.bo);
3899
3900     /* private function & data */
3901     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3902     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3903     pp_context->private_context = dndi_ctx;
3904     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3905
3906     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3907     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3908     pp_static_parameter->grf1.di_top_field_first = 0;
3909     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3910
3911     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3912     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3913     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3914
3915     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3916     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3917
3918     dndi_ctx->dest_w = w;
3919     dndi_ctx->dest_h = h;
3920
3921     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3922     return VA_STATUS_SUCCESS;
3923 }
3924
3925 static int
3926 gen7_pp_dn_x_steps(void *private_context)
3927 {
3928     struct pp_dn_context *pp_dn_context = private_context;
3929
3930     return pp_dn_context->dest_w / 16;
3931 }
3932
3933 static int
3934 gen7_pp_dn_y_steps(void *private_context)
3935 {
3936     struct pp_dn_context *pp_dn_context = private_context;
3937
3938     return pp_dn_context->dest_h / 4;
3939 }
3940
3941 static int
3942 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3943 {
3944     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3945
3946     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3947     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3948
3949     return 0;
3950 }
3951
3952 static VAStatus
3953 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3954                            const struct i965_surface *src_surface,
3955                            const VARectangle *src_rect,
3956                            struct i965_surface *dst_surface,
3957                            const VARectangle *dst_rect,
3958                            void *filter_param)
3959 {
3960     struct i965_driver_data *i965 = i965_driver_data(ctx);
3961     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3962     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3963     struct object_surface *obj_surface;
3964     struct gen7_sampler_dndi *sampler_dn;
3965     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3966     int index;
3967     int w, h;
3968     int orig_w, orig_h;
3969     int dn_strength = 15;
3970     int dndi_top_first = 1;
3971     int dn_progressive = 0;
3972
3973     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3974         dndi_top_first = 1;
3975         dn_progressive = 1;
3976     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3977         dndi_top_first = 1;
3978         dn_progressive = 0;
3979     } else {
3980         dndi_top_first = 0;
3981         dn_progressive = 0;
3982     }
3983
3984     if (dn_filter_param) {
3985         float value = dn_filter_param->value;
3986
3987         if (value > 1.0)
3988             value = 1.0;
3989
3990         if (value < 0.0)
3991             value = 0.0;
3992
3993         dn_strength = (int)(value * 31.0F);
3994     }
3995
3996     /* surface */
3997     obj_surface = (struct object_surface *)src_surface->base;
3998     orig_w = obj_surface->orig_width;
3999     orig_h = obj_surface->orig_height;
4000     w = obj_surface->width;
4001     h = obj_surface->height;
4002
4003     if (pp_dn_context->stmm_bo == NULL) {
4004         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4005                                               "STMM surface",
4006                                               w * h,
4007                                               4096);
4008         assert(pp_dn_context->stmm_bo);
4009     }
4010
4011     /* source UV surface index 1 */
4012     gen7_pp_set_surface_state(ctx, pp_context,
4013                               obj_surface->bo, w * h,
4014                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4015                               1, 0);
4016
4017     /* source YUV surface index 3 */
4018     gen7_pp_set_surface2_state(ctx, pp_context,
4019                                obj_surface->bo, 0,
4020                                orig_w, orig_h, w,
4021                                0, h,
4022                                SURFACE_FORMAT_PLANAR_420_8, 1,
4023                                3);
4024
4025     /* source (temporal reference) YUV surface index 4 */
4026     gen7_pp_set_surface2_state(ctx, pp_context,
4027                                obj_surface->bo, 0,
4028                                orig_w, orig_h, w,
4029                                0, h,
4030                                SURFACE_FORMAT_PLANAR_420_8, 1,
4031                                4);
4032
4033     /* STMM / History Statistics input surface, index 5 */
4034     gen7_pp_set_surface_state(ctx, pp_context,
4035                               pp_dn_context->stmm_bo, 0,
4036                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4037                               33, 1);
4038
4039     /* destination surface */
4040     obj_surface = (struct object_surface *)dst_surface->base;
4041     orig_w = obj_surface->orig_width;
4042     orig_h = obj_surface->orig_height;
4043     w = obj_surface->width;
4044     h = obj_surface->height;
4045
4046     /* destination Y surface index 24 */
4047     gen7_pp_set_surface_state(ctx, pp_context,
4048                               obj_surface->bo, 0,
4049                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4050                               24, 1);
4051
4052     /* destination UV surface index 25 */
4053     gen7_pp_set_surface_state(ctx, pp_context,
4054                               obj_surface->bo, w * h,
4055                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4056                               25, 1);
4057
4058     /* sampler dn */
4059     dri_bo_map(pp_context->sampler_state_table.bo, True);
4060     assert(pp_context->sampler_state_table.bo->virtual);
4061     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4062     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4063
4064     /* sample dn index 1 */
4065     index = 0;
4066     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4067     sampler_dn[index].dw0.dnmh_delt = 8;
4068     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4069     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4070     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4071     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4072
4073     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4074     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4075     sampler_dn[index].dw1.stmm_c2 = 0;
4076     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4077     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4078
4079     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4080     sampler_dn[index].dw2.bne_edge_th = 1;
4081     sampler_dn[index].dw2.smooth_mv_th = 0;
4082     sampler_dn[index].dw2.sad_tight_th = 5;
4083     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4084     sampler_dn[index].dw2.good_neighbor_th = 4;
4085
4086     sampler_dn[index].dw3.maximum_stmm = 128;
4087     sampler_dn[index].dw3.multipler_for_vecm = 2;
4088     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4089     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4090     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4091
4092     sampler_dn[index].dw4.sdi_delta = 8;
4093     sampler_dn[index].dw4.sdi_threshold = 128;
4094     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4095     sampler_dn[index].dw4.stmm_shift_up = 0;
4096     sampler_dn[index].dw4.stmm_shift_down = 0;
4097     sampler_dn[index].dw4.minimum_stmm = 0;
4098
4099     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4100     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4101     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4102     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4103
4104     sampler_dn[index].dw6.dn_enable = 1;
4105     sampler_dn[index].dw6.di_enable = 0;
4106     sampler_dn[index].dw6.di_partial = 0;
4107     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4108     sampler_dn[index].dw6.dndi_stream_id = 1;
4109     sampler_dn[index].dw6.dndi_first_frame = 1;
4110     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4111     sampler_dn[index].dw6.mcdi_enable = 0;
4112     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4113     sampler_dn[index].dw6.cat_th1 = 0;
4114     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4115     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4116
4117     sampler_dn[index].dw7.sad_tha = 5;
4118     sampler_dn[index].dw7.sad_thb = 10;
4119     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4120     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4121     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4122     sampler_dn[index].dw7.vdi_walker_enable = 0;
4123     sampler_dn[index].dw7.neighborpixel_th = 10;
4124     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4125
4126     dri_bo_unmap(pp_context->sampler_state_table.bo);
4127
4128     /* private function & data */
4129     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4130     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4131     pp_context->private_context = &pp_context->pp_dn_context;
4132     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4133
4134     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4135     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4136     pp_static_parameter->grf1.di_top_field_first = 0;
4137     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4138
4139     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4140     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4141     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4142
4143     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4144     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4145
4146     pp_dn_context->dest_w = w;
4147     pp_dn_context->dest_h = h;
4148
4149     dst_surface->flags = src_surface->flags;
4150
4151     return VA_STATUS_SUCCESS;
4152 }
4153
4154 static VAStatus
4155 ironlake_pp_initialize(
4156     VADriverContextP ctx,
4157     struct i965_post_processing_context *pp_context,
4158     const struct i965_surface *src_surface,
4159     const VARectangle *src_rect,
4160     struct i965_surface *dst_surface,
4161     const VARectangle *dst_rect,
4162     int pp_index,
4163     void *filter_param
4164 )
4165 {
4166     VAStatus va_status;
4167     struct i965_driver_data *i965 = i965_driver_data(ctx);
4168     struct pp_module *pp_module;
4169     dri_bo *bo;
4170     int static_param_size, inline_param_size;
4171
4172     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4173     bo = dri_bo_alloc(i965->intel.bufmgr,
4174                       "surface state & binding table",
4175                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4176                       4096);
4177     assert(bo);
4178     pp_context->surface_state_binding_table.bo = bo;
4179
4180     dri_bo_unreference(pp_context->curbe.bo);
4181     bo = dri_bo_alloc(i965->intel.bufmgr,
4182                       "constant buffer",
4183                       4096,
4184                       4096);
4185     assert(bo);
4186     pp_context->curbe.bo = bo;
4187
4188     dri_bo_unreference(pp_context->idrt.bo);
4189     bo = dri_bo_alloc(i965->intel.bufmgr,
4190                       "interface discriptor",
4191                       sizeof(struct i965_interface_descriptor),
4192                       4096);
4193     assert(bo);
4194     pp_context->idrt.bo = bo;
4195     pp_context->idrt.num_interface_descriptors = 0;
4196
4197     dri_bo_unreference(pp_context->sampler_state_table.bo);
4198     bo = dri_bo_alloc(i965->intel.bufmgr,
4199                       "sampler state table",
4200                       4096,
4201                       4096);
4202     assert(bo);
4203     dri_bo_map(bo, True);
4204     memset(bo->virtual, 0, bo->size);
4205     dri_bo_unmap(bo);
4206     pp_context->sampler_state_table.bo = bo;
4207
4208     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4209     bo = dri_bo_alloc(i965->intel.bufmgr,
4210                       "sampler 8x8 state ",
4211                       4096,
4212                       4096);
4213     assert(bo);
4214     pp_context->sampler_state_table.bo_8x8 = bo;
4215
4216     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4217     bo = dri_bo_alloc(i965->intel.bufmgr,
4218                       "sampler 8x8 state ",
4219                       4096,
4220                       4096);
4221     assert(bo);
4222     pp_context->sampler_state_table.bo_8x8_uv = bo;
4223
4224     dri_bo_unreference(pp_context->vfe_state.bo);
4225     bo = dri_bo_alloc(i965->intel.bufmgr,
4226                       "vfe state",
4227                       sizeof(struct i965_vfe_state),
4228                       4096);
4229     assert(bo);
4230     pp_context->vfe_state.bo = bo;
4231
4232     static_param_size = sizeof(struct pp_static_parameter);
4233     inline_param_size = sizeof(struct pp_inline_parameter);
4234
4235     memset(pp_context->pp_static_parameter, 0, static_param_size);
4236     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4237
4238     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4239     pp_context->current_pp = pp_index;
4240     pp_module = &pp_context->pp_modules[pp_index];
4241
4242     if (pp_module->initialize)
4243         va_status = pp_module->initialize(ctx, pp_context,
4244                                           src_surface,
4245                                           src_rect,
4246                                           dst_surface,
4247                                           dst_rect,
4248                                           filter_param);
4249     else
4250         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4251
4252     return va_status;
4253 }
4254
4255 static VAStatus
4256 ironlake_post_processing(
4257     VADriverContextP   ctx,
4258     struct i965_post_processing_context *pp_context,
4259     const struct i965_surface *src_surface,
4260     const VARectangle *src_rect,
4261     struct i965_surface *dst_surface,
4262     const VARectangle *dst_rect,
4263     int                pp_index,
4264     void *filter_param
4265 )
4266 {
4267     VAStatus va_status;
4268
4269     va_status = ironlake_pp_initialize(ctx, pp_context,
4270                                        src_surface,
4271                                        src_rect,
4272                                        dst_surface,
4273                                        dst_rect,
4274                                        pp_index,
4275                                        filter_param);
4276
4277     if (va_status == VA_STATUS_SUCCESS) {
4278         ironlake_pp_states_setup(ctx, pp_context);
4279         ironlake_pp_pipeline_setup(ctx, pp_context);
4280     }
4281
4282     return va_status;
4283 }
4284
4285 static VAStatus
4286 gen6_pp_initialize(
4287     VADriverContextP ctx,
4288     struct i965_post_processing_context *pp_context,
4289     const struct i965_surface *src_surface,
4290     const VARectangle *src_rect,
4291     struct i965_surface *dst_surface,
4292     const VARectangle *dst_rect,
4293     int pp_index,
4294     void *filter_param
4295 )
4296 {
4297     VAStatus va_status;
4298     struct i965_driver_data *i965 = i965_driver_data(ctx);
4299     struct pp_module *pp_module;
4300     dri_bo *bo;
4301     int static_param_size, inline_param_size;
4302
4303     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4304     bo = dri_bo_alloc(i965->intel.bufmgr,
4305                       "surface state & binding table",
4306                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4307                       4096);
4308     assert(bo);
4309     pp_context->surface_state_binding_table.bo = bo;
4310
4311     dri_bo_unreference(pp_context->curbe.bo);
4312     bo = dri_bo_alloc(i965->intel.bufmgr,
4313                       "constant buffer",
4314                       4096,
4315                       4096);
4316     assert(bo);
4317     pp_context->curbe.bo = bo;
4318
4319     dri_bo_unreference(pp_context->idrt.bo);
4320     bo = dri_bo_alloc(i965->intel.bufmgr,
4321                       "interface discriptor",
4322                       sizeof(struct gen6_interface_descriptor_data),
4323                       4096);
4324     assert(bo);
4325     pp_context->idrt.bo = bo;
4326     pp_context->idrt.num_interface_descriptors = 0;
4327
4328     dri_bo_unreference(pp_context->sampler_state_table.bo);
4329     bo = dri_bo_alloc(i965->intel.bufmgr,
4330                       "sampler state table",
4331                       4096,
4332                       4096);
4333     assert(bo);
4334     dri_bo_map(bo, True);
4335     memset(bo->virtual, 0, bo->size);
4336     dri_bo_unmap(bo);
4337     pp_context->sampler_state_table.bo = bo;
4338
4339     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4340     bo = dri_bo_alloc(i965->intel.bufmgr,
4341                       "sampler 8x8 state ",
4342                       4096,
4343                       4096);
4344     assert(bo);
4345     pp_context->sampler_state_table.bo_8x8 = bo;
4346
4347     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4348     bo = dri_bo_alloc(i965->intel.bufmgr,
4349                       "sampler 8x8 state ",
4350                       4096,
4351                       4096);
4352     assert(bo);
4353     pp_context->sampler_state_table.bo_8x8_uv = bo;
4354
4355     dri_bo_unreference(pp_context->vfe_state.bo);
4356     bo = dri_bo_alloc(i965->intel.bufmgr,
4357                       "vfe state",
4358                       sizeof(struct i965_vfe_state),
4359                       4096);
4360     assert(bo);
4361     pp_context->vfe_state.bo = bo;
4362
4363     if (IS_GEN7(i965->intel.device_info)) {
4364         static_param_size = sizeof(struct gen7_pp_static_parameter);
4365         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4366     } else {
4367         static_param_size = sizeof(struct pp_static_parameter);
4368         inline_param_size = sizeof(struct pp_inline_parameter);
4369     }
4370
4371     memset(pp_context->pp_static_parameter, 0, static_param_size);
4372     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4373
4374     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4375     pp_context->current_pp = pp_index;
4376     pp_module = &pp_context->pp_modules[pp_index];
4377
4378     if (pp_module->initialize)
4379         va_status = pp_module->initialize(ctx, pp_context,
4380                                           src_surface,
4381                                           src_rect,
4382                                           dst_surface,
4383                                           dst_rect,
4384                                           filter_param);
4385     else
4386         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4387
4388     calculate_boundary_block_mask(pp_context, dst_rect);
4389
4390     return va_status;
4391 }
4392
4393
4394 static void
4395 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4396                                    struct i965_post_processing_context *pp_context)
4397 {
4398     struct i965_driver_data *i965 = i965_driver_data(ctx);
4399     struct gen6_interface_descriptor_data *desc;
4400     dri_bo *bo;
4401     int pp_index = pp_context->current_pp;
4402
4403     bo = pp_context->idrt.bo;
4404     dri_bo_map(bo, True);
4405     assert(bo->virtual);
4406     desc = bo->virtual;
4407     memset(desc, 0, sizeof(*desc));
4408     desc->desc0.kernel_start_pointer =
4409         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4410     desc->desc1.single_program_flow = 1;
4411     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4412     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4413     desc->desc2.sampler_state_pointer =
4414         pp_context->sampler_state_table.bo->offset >> 5;
4415     desc->desc3.binding_table_entry_count = 0;
4416     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4417     desc->desc4.constant_urb_entry_read_offset = 0;
4418
4419     if (IS_GEN7(i965->intel.device_info))
4420         desc->desc4.constant_urb_entry_read_length = 8; /* grf 1-8 */
4421     else
4422         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4423
4424     dri_bo_emit_reloc(bo,
4425                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4426                       0,
4427                       offsetof(struct gen6_interface_descriptor_data, desc0),
4428                       pp_context->pp_modules[pp_index].kernel.bo);
4429
4430     dri_bo_emit_reloc(bo,
4431                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4432                       desc->desc2.sampler_count << 2,
4433                       offsetof(struct gen6_interface_descriptor_data, desc2),
4434                       pp_context->sampler_state_table.bo);
4435
4436     dri_bo_unmap(bo);
4437     pp_context->idrt.num_interface_descriptors++;
4438 }
4439
4440 static void
4441 gen6_pp_upload_constants(VADriverContextP ctx,
4442                          struct i965_post_processing_context *pp_context)
4443 {
4444     struct i965_driver_data *i965 = i965_driver_data(ctx);
4445     unsigned char *constant_buffer;
4446     int param_size;
4447
4448     assert(sizeof(struct pp_static_parameter) == 128);
4449     assert(sizeof(struct gen7_pp_static_parameter) == 256);
4450
4451     if (IS_GEN7(i965->intel.device_info))
4452         param_size = sizeof(struct gen7_pp_static_parameter);
4453     else
4454         param_size = sizeof(struct pp_static_parameter);
4455
4456     dri_bo_map(pp_context->curbe.bo, 1);
4457     assert(pp_context->curbe.bo->virtual);
4458     constant_buffer = pp_context->curbe.bo->virtual;
4459     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4460     dri_bo_unmap(pp_context->curbe.bo);
4461 }
4462
4463 static void
4464 gen6_pp_states_setup(VADriverContextP ctx,
4465                      struct i965_post_processing_context *pp_context)
4466 {
4467     gen6_pp_interface_descriptor_table(ctx, pp_context);
4468     gen6_pp_upload_constants(ctx, pp_context);
4469 }
4470
4471 static void
4472 gen6_pp_pipeline_select(VADriverContextP ctx,
4473                         struct i965_post_processing_context *pp_context)
4474 {
4475     struct intel_batchbuffer *batch = pp_context->batch;
4476
4477     BEGIN_BATCH(batch, 1);
4478     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4479     ADVANCE_BATCH(batch);
4480 }
4481
4482 static void
4483 gen6_pp_state_base_address(VADriverContextP ctx,
4484                            struct i965_post_processing_context *pp_context)
4485 {
4486     struct intel_batchbuffer *batch = pp_context->batch;
4487
4488     BEGIN_BATCH(batch, 10);
4489     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4490     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4491     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4492     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4493     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4494     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4495     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4496     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4497     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4498     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4499     ADVANCE_BATCH(batch);
4500 }
4501
4502 static void
4503 gen6_pp_vfe_state(VADriverContextP ctx,
4504                   struct i965_post_processing_context *pp_context)
4505 {
4506     struct intel_batchbuffer *batch = pp_context->batch;
4507
4508     BEGIN_BATCH(batch, 8);
4509     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4510     OUT_BATCH(batch, 0);
4511     OUT_BATCH(batch,
4512               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4513               pp_context->vfe_gpu_state.num_urb_entries << 8);
4514     OUT_BATCH(batch, 0);
4515     OUT_BATCH(batch,
4516               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
4517               /* URB Entry Allocation Size, in 256 bits unit */
4518               (pp_context->vfe_gpu_state.curbe_allocation_size));
4519     /* CURBE Allocation Size, in 256 bits unit */
4520     OUT_BATCH(batch, 0);
4521     OUT_BATCH(batch, 0);
4522     OUT_BATCH(batch, 0);
4523     ADVANCE_BATCH(batch);
4524 }
4525
4526 static void
4527 gen6_pp_curbe_load(VADriverContextP ctx,
4528                    struct i965_post_processing_context *pp_context)
4529 {
4530     struct intel_batchbuffer *batch = pp_context->batch;
4531     struct i965_driver_data *i965 = i965_driver_data(ctx);
4532     int param_size;
4533
4534     if (IS_GEN7(i965->intel.device_info))
4535         param_size = sizeof(struct gen7_pp_static_parameter);
4536     else
4537         param_size = sizeof(struct pp_static_parameter);
4538
4539     BEGIN_BATCH(batch, 4);
4540     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4541     OUT_BATCH(batch, 0);
4542     OUT_BATCH(batch,
4543               param_size);
4544     OUT_RELOC(batch,
4545               pp_context->curbe.bo,
4546               I915_GEM_DOMAIN_INSTRUCTION, 0,
4547               0);
4548     ADVANCE_BATCH(batch);
4549 }
4550
4551 static void
4552 gen6_interface_descriptor_load(VADriverContextP ctx,
4553                                struct i965_post_processing_context *pp_context)
4554 {
4555     struct intel_batchbuffer *batch = pp_context->batch;
4556
4557     BEGIN_BATCH(batch, 4);
4558     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4559     OUT_BATCH(batch, 0);
4560     OUT_BATCH(batch,
4561               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4562     OUT_RELOC(batch,
4563               pp_context->idrt.bo,
4564               I915_GEM_DOMAIN_INSTRUCTION, 0,
4565               0);
4566     ADVANCE_BATCH(batch);
4567 }
4568
4569 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps)
4570 {
4571     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4572
4573     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4574     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4575     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4576     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4577     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4578     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4579
4580     /* 1 x N */
4581     if (x_steps == 1) {
4582         if (y == y_steps - 1) {
4583             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4584         } else {
4585             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4586         }
4587     }
4588
4589     /* M x 1 */
4590     if (y_steps == 1) {
4591         if (x == 0) { // all blocks in this group are on the left edge
4592             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4593             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
4594         } else if (x == x_steps - 1) {
4595             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4596             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4597         } else {
4598             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4599             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4600             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4601         }
4602     }
4603
4604 }
4605
4606 static void
4607 gen6_pp_object_walker(VADriverContextP ctx,
4608                       struct i965_post_processing_context *pp_context)
4609 {
4610     struct i965_driver_data *i965 = i965_driver_data(ctx);
4611     struct intel_batchbuffer *batch = pp_context->batch;
4612     int x, x_steps, y, y_steps;
4613     int param_size, command_length_in_dws;
4614     dri_bo *command_buffer;
4615     unsigned int *command_ptr;
4616
4617     if (IS_GEN7(i965->intel.device_info))
4618         param_size = sizeof(struct gen7_pp_inline_parameter);
4619     else
4620         param_size = sizeof(struct pp_inline_parameter);
4621
4622     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4623     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4624     command_length_in_dws = 6 + (param_size >> 2);
4625     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4626                                   "command objects buffer",
4627                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4628                                   4096);
4629
4630     dri_bo_map(command_buffer, 1);
4631     command_ptr = command_buffer->virtual;
4632
4633     for (y = 0; y < y_steps; y++) {
4634         for (x = 0; x < x_steps; x++) {
4635             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4636                 // some common block parameter update goes here, apply to all pp functions
4637                 if (IS_GEN6(i965->intel.device_info))
4638                     update_block_mask_parameter(pp_context, x, y, x_steps, y_steps);
4639
4640                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4641                 *command_ptr++ = 0;
4642                 *command_ptr++ = 0;
4643                 *command_ptr++ = 0;
4644                 *command_ptr++ = 0;
4645                 *command_ptr++ = 0;
4646                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4647                 command_ptr += (param_size >> 2);
4648             }
4649         }
4650     }
4651
4652     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4653         *command_ptr++ = 0;
4654
4655     *command_ptr = MI_BATCH_BUFFER_END;
4656
4657     dri_bo_unmap(command_buffer);
4658
4659     BEGIN_BATCH(batch, 2);
4660     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4661     OUT_RELOC(batch, command_buffer,
4662               I915_GEM_DOMAIN_COMMAND, 0,
4663               0);
4664     ADVANCE_BATCH(batch);
4665
4666     dri_bo_unreference(command_buffer);
4667
4668     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4669      * will cause control to pass back to ring buffer
4670      */
4671     intel_batchbuffer_end_atomic(batch);
4672     intel_batchbuffer_flush(batch);
4673     intel_batchbuffer_start_atomic(batch, 0x1000);
4674 }
4675
4676 static void
4677 gen6_pp_pipeline_setup(VADriverContextP ctx,
4678                        struct i965_post_processing_context *pp_context)
4679 {
4680     struct intel_batchbuffer *batch = pp_context->batch;
4681
4682     intel_batchbuffer_start_atomic(batch, 0x1000);
4683     intel_batchbuffer_emit_mi_flush(batch);
4684     gen6_pp_pipeline_select(ctx, pp_context);
4685     gen6_pp_state_base_address(ctx, pp_context);
4686     gen6_pp_vfe_state(ctx, pp_context);
4687     gen6_pp_curbe_load(ctx, pp_context);
4688     gen6_interface_descriptor_load(ctx, pp_context);
4689     gen6_pp_object_walker(ctx, pp_context);
4690     intel_batchbuffer_end_atomic(batch);
4691 }
4692
4693 static VAStatus
4694 gen6_post_processing(
4695     VADriverContextP ctx,
4696     struct i965_post_processing_context *pp_context,
4697     const struct i965_surface *src_surface,
4698     const VARectangle *src_rect,
4699     struct i965_surface *dst_surface,
4700     const VARectangle *dst_rect,
4701     int pp_index,
4702     void *filter_param
4703 )
4704 {
4705     VAStatus va_status;
4706
4707     va_status = gen6_pp_initialize(ctx, pp_context,
4708                                    src_surface,
4709                                    src_rect,
4710                                    dst_surface,
4711                                    dst_rect,
4712                                    pp_index,
4713                                    filter_param);
4714
4715     if (va_status == VA_STATUS_SUCCESS) {
4716         gen6_pp_states_setup(ctx, pp_context);
4717         gen6_pp_pipeline_setup(ctx, pp_context);
4718     }
4719
4720     if (va_status == VA_STATUS_SUCCESS_1)
4721         va_status = VA_STATUS_SUCCESS;
4722
4723     return va_status;
4724 }
4725
4726 static VAStatus
4727 i965_post_processing_internal(
4728     VADriverContextP   ctx,
4729     struct i965_post_processing_context *pp_context,
4730     const struct i965_surface *src_surface,
4731     const VARectangle *src_rect,
4732     struct i965_surface *dst_surface,
4733     const VARectangle *dst_rect,
4734     int                pp_index,
4735     void *filter_param
4736 )
4737 {
4738     VAStatus va_status;
4739
4740     if (pp_context && pp_context->intel_post_processing) {
4741         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4742                                                         src_surface, src_rect,
4743                                                         dst_surface, dst_rect,
4744                                                         pp_index, filter_param);
4745     } else {
4746         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4747     }
4748
4749     return va_status;
4750 }
4751
4752 static void
4753 rgb_to_yuv(unsigned int argb,
4754            unsigned char *y,
4755            unsigned char *u,
4756            unsigned char *v,
4757            unsigned char *a)
4758 {
4759     int r = ((argb >> 16) & 0xff);
4760     int g = ((argb >> 8) & 0xff);
4761     int b = ((argb >> 0) & 0xff);
4762
4763     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4764     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4765     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4766     *a = ((argb >> 24) & 0xff);
4767 }
4768
4769 static void
4770 i965_vpp_clear_surface(VADriverContextP ctx,
4771                        struct i965_post_processing_context *pp_context,
4772                        struct object_surface *obj_surface,
4773                        unsigned int color)
4774 {
4775     struct i965_driver_data *i965 = i965_driver_data(ctx);
4776     struct intel_batchbuffer *batch = pp_context->batch;
4777     unsigned int blt_cmd, br13;
4778     unsigned int tiling = 0, swizzle = 0;
4779     int pitch;
4780     unsigned char y, u, v, a = 0;
4781     int region_width, region_height;
4782
4783     /* Currently only support NV12 surface */
4784     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4785         return;
4786
4787     rgb_to_yuv(color, &y, &u, &v, &a);
4788
4789     if (a == 0)
4790         return;
4791
4792     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4793     blt_cmd = XY_COLOR_BLT_CMD;
4794     pitch = obj_surface->width;
4795
4796     if (tiling != I915_TILING_NONE) {
4797         assert(tiling == I915_TILING_Y);
4798         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4799         // pitch >>= 2;
4800     }
4801
4802     br13 = 0xf0 << 16;
4803     br13 |= BR13_8;
4804     br13 |= pitch;
4805
4806     if (IS_IRONLAKE(i965->intel.device_info)) {
4807         intel_batchbuffer_start_atomic(batch, 48);
4808         BEGIN_BATCH(batch, 12);
4809     } else {
4810         /* Will double-check the command if the new chipset is added */
4811         intel_batchbuffer_start_atomic_blt(batch, 48);
4812         BEGIN_BLT_BATCH(batch, 12);
4813     }
4814
4815     region_width = obj_surface->width;
4816     region_height = obj_surface->height;
4817
4818     OUT_BATCH(batch, blt_cmd);
4819     OUT_BATCH(batch, br13);
4820     OUT_BATCH(batch,
4821               0 << 16 |
4822               0);
4823     OUT_BATCH(batch,
4824               region_height << 16 |
4825               region_width);
4826     OUT_RELOC(batch, obj_surface->bo,
4827               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4828               0);
4829     OUT_BATCH(batch, y);
4830
4831     br13 = 0xf0 << 16;
4832     br13 |= BR13_565;
4833     br13 |= pitch;
4834
4835     region_width = obj_surface->width / 2;
4836     region_height = obj_surface->height / 2;
4837
4838     if (tiling == I915_TILING_Y) {
4839         region_height = ALIGN(obj_surface->height / 2, 32);
4840     }
4841
4842     OUT_BATCH(batch, blt_cmd);
4843     OUT_BATCH(batch, br13);
4844     OUT_BATCH(batch,
4845               0 << 16 |
4846               0);
4847     OUT_BATCH(batch,
4848               region_height << 16 |
4849               region_width);
4850     OUT_RELOC(batch, obj_surface->bo,
4851               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4852               obj_surface->width * obj_surface->y_cb_offset);
4853     OUT_BATCH(batch, v << 8 | u);
4854
4855     ADVANCE_BATCH(batch);
4856     intel_batchbuffer_end_atomic(batch);
4857 }
4858
4859 VAStatus
4860 i965_scaling_processing(
4861     VADriverContextP   ctx,
4862     struct object_surface *src_surface_obj,
4863     const VARectangle *src_rect,
4864     struct object_surface *dst_surface_obj,
4865     const VARectangle *dst_rect,
4866     unsigned int       va_flags)
4867 {
4868     VAStatus va_status = VA_STATUS_SUCCESS;
4869     struct i965_driver_data *i965 = i965_driver_data(ctx);
4870
4871     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4872     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4873
4874     if (HAS_VPP(i965)) {
4875         struct i965_surface src_surface;
4876         struct i965_surface dst_surface;
4877         struct i965_post_processing_context *pp_context;
4878         unsigned int filter_flags;
4879
4880         _i965LockMutex(&i965->pp_mutex);
4881
4882         src_surface.base = (struct object_base *)src_surface_obj;
4883         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4884         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4885         dst_surface.base = (struct object_base *)dst_surface_obj;
4886         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4887         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4888
4889         pp_context = i965->pp_context;
4890         filter_flags = pp_context->filter_flags;
4891         pp_context->filter_flags = va_flags;
4892
4893         va_status = i965_post_processing_internal(ctx, pp_context,
4894                                                   &src_surface, src_rect, &dst_surface, dst_rect,
4895                                                   avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
4896
4897         pp_context->filter_flags = filter_flags;
4898
4899         _i965UnlockMutex(&i965->pp_mutex);
4900     }
4901
4902     return va_status;
4903 }
4904
4905 VASurfaceID
4906 i965_post_processing(
4907     VADriverContextP   ctx,
4908     struct object_surface *obj_surface,
4909     const VARectangle *src_rect,
4910     const VARectangle *dst_rect,
4911     unsigned int       va_flags,
4912     int               *has_done_scaling,
4913     VARectangle *calibrated_rect
4914 )
4915 {
4916     struct i965_driver_data *i965 = i965_driver_data(ctx);
4917     VASurfaceID out_surface_id = VA_INVALID_ID;
4918     VASurfaceID tmp_id = VA_INVALID_ID;
4919
4920     *has_done_scaling = 0;
4921
4922     if (HAS_VPP(i965)) {
4923         VAStatus status;
4924         struct i965_surface src_surface;
4925         struct i965_surface dst_surface;
4926         struct i965_post_processing_context *pp_context;
4927
4928         /* Currently only support post processing for NV12 surface */
4929         if (obj_surface->fourcc != VA_FOURCC_NV12)
4930             return out_surface_id;
4931
4932         _i965LockMutex(&i965->pp_mutex);
4933
4934         pp_context = i965->pp_context;
4935         pp_context->filter_flags = va_flags;
4936         if (avs_is_needed(va_flags)) {
4937             VARectangle tmp_dst_rect;
4938
4939             if (out_surface_id != VA_INVALID_ID)
4940                 tmp_id = out_surface_id;
4941
4942             tmp_dst_rect.x = 0;
4943             tmp_dst_rect.y = 0;
4944             tmp_dst_rect.width = dst_rect->width;
4945             tmp_dst_rect.height = dst_rect->height;
4946             src_surface.base = (struct object_base *)obj_surface;
4947             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4948             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4949
4950             status = i965_CreateSurfaces(ctx,
4951                                          dst_rect->width,
4952                                          dst_rect->height,
4953                                          VA_RT_FORMAT_YUV420,
4954                                          1,
4955                                          &out_surface_id);
4956             assert(status == VA_STATUS_SUCCESS);
4957             obj_surface = SURFACE(out_surface_id);
4958             assert(obj_surface);
4959             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4960             i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
4961
4962             dst_surface.base = (struct object_base *)obj_surface;
4963             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4964             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4965
4966             i965_post_processing_internal(ctx, pp_context,
4967                                           &src_surface,
4968                                           src_rect,
4969                                           &dst_surface,
4970                                           &tmp_dst_rect,
4971                                           PP_NV12_AVS,
4972                                           NULL);
4973
4974             if (tmp_id != VA_INVALID_ID)
4975                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4976
4977             *has_done_scaling = 1;
4978             calibrated_rect->x = 0;
4979             calibrated_rect->y = 0;
4980             calibrated_rect->width = dst_rect->width;
4981             calibrated_rect->height = dst_rect->height;
4982         }
4983
4984         _i965UnlockMutex(&i965->pp_mutex);
4985     }
4986
4987     return out_surface_id;
4988 }
4989
4990 static VAStatus
4991 i965_image_pl2_processing(VADriverContextP ctx,
4992                           const struct i965_surface *src_surface,
4993                           const VARectangle *src_rect,
4994                           struct i965_surface *dst_surface,
4995                           const VARectangle *dst_rect);
4996
4997 static VAStatus
4998 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
4999                                    VAStatus(*i965_image_plx_nv12_processing)(
5000                                        VADriverContextP,
5001                                        const struct i965_surface *,
5002                                        const VARectangle *,
5003                                        struct i965_surface *,
5004                                        const VARectangle *),
5005                                    const struct i965_surface *src_surface,
5006                                    const VARectangle *src_rect,
5007                                    struct i965_surface *dst_surface,
5008                                    const VARectangle *dst_rect)
5009 {
5010     struct i965_driver_data *i965 = i965_driver_data(ctx);
5011     VAStatus status;
5012     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5013     struct object_surface *obj_surface = NULL;
5014     struct i965_surface tmp_surface;
5015     int width, height;
5016
5017     pp_get_surface_size(ctx, dst_surface, &width, &height);
5018     status = i965_CreateSurfaces(ctx,
5019                                  width,
5020                                  height,
5021                                  VA_RT_FORMAT_YUV420,
5022                                  1,
5023                                  &tmp_surface_id);
5024     assert(status == VA_STATUS_SUCCESS);
5025     obj_surface = SURFACE(tmp_surface_id);
5026     assert(obj_surface);
5027     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5028
5029     tmp_surface.base = (struct object_base *)obj_surface;
5030     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5031     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5032
5033     status = i965_image_plx_nv12_processing(ctx,
5034                                             src_surface,
5035                                             src_rect,
5036                                             &tmp_surface,
5037                                             dst_rect);
5038
5039     if (status == VA_STATUS_SUCCESS)
5040         status = i965_image_pl2_processing(ctx,
5041                                            &tmp_surface,
5042                                            dst_rect,
5043                                            dst_surface,
5044                                            dst_rect);
5045
5046     i965_DestroySurfaces(ctx,
5047                          &tmp_surface_id,
5048                          1);
5049
5050     return status;
5051 }
5052
5053
5054 static VAStatus
5055 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5056                                const struct i965_surface *src_surface,
5057                                const VARectangle *src_rect,
5058                                struct i965_surface *dst_surface,
5059                                const VARectangle *dst_rect)
5060 {
5061     struct i965_driver_data *i965 = i965_driver_data(ctx);
5062     struct i965_post_processing_context *pp_context = i965->pp_context;
5063     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5064     VAStatus vaStatus;
5065
5066     vaStatus = intel_common_scaling_post_processing(ctx,
5067                                                     pp_context,
5068                                                     src_surface,
5069                                                     src_rect,
5070                                                     dst_surface,
5071                                                     dst_rect);
5072
5073     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5074         return vaStatus;
5075
5076     switch (fourcc) {
5077     case VA_FOURCC_NV12:
5078         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5079                                                  src_surface,
5080                                                  src_rect,
5081                                                  dst_surface,
5082                                                  dst_rect,
5083                                                  PP_RGBX_LOAD_SAVE_NV12,
5084                                                  NULL);
5085         intel_batchbuffer_flush(pp_context->batch);
5086         break;
5087
5088     default:
5089         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5090                                                       i965_image_pl1_rgbx_processing,
5091                                                       src_surface,
5092                                                       src_rect,
5093                                                       dst_surface,
5094                                                       dst_rect);
5095         break;
5096     }
5097
5098     return vaStatus;
5099 }
5100
5101 static VAStatus
5102 i965_image_pl3_processing(VADriverContextP ctx,
5103                           const struct i965_surface *src_surface,
5104                           const VARectangle *src_rect,
5105                           struct i965_surface *dst_surface,
5106                           const VARectangle *dst_rect)
5107 {
5108     struct i965_driver_data *i965 = i965_driver_data(ctx);
5109     struct i965_post_processing_context *pp_context = i965->pp_context;
5110     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5111     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5112
5113     vaStatus = intel_common_scaling_post_processing(ctx,
5114                                                     pp_context,
5115                                                     src_surface,
5116                                                     src_rect,
5117                                                     dst_surface,
5118                                                     dst_rect);
5119
5120     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5121         return vaStatus;
5122
5123     switch (fourcc) {
5124     case VA_FOURCC_NV12:
5125         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5126                                                  src_surface,
5127                                                  src_rect,
5128                                                  dst_surface,
5129                                                  dst_rect,
5130                                                  PP_PL3_LOAD_SAVE_N12,
5131                                                  NULL);
5132         intel_batchbuffer_flush(pp_context->batch);
5133         break;
5134
5135     case VA_FOURCC_IMC1:
5136     case VA_FOURCC_IMC3:
5137     case VA_FOURCC_YV12:
5138     case VA_FOURCC_I420:
5139         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5140                                                  src_surface,
5141                                                  src_rect,
5142                                                  dst_surface,
5143                                                  dst_rect,
5144                                                  PP_PL3_LOAD_SAVE_PL3,
5145                                                  NULL);
5146         intel_batchbuffer_flush(pp_context->batch);
5147         break;
5148
5149     case VA_FOURCC_YUY2:
5150     case VA_FOURCC_UYVY:
5151         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5152                                                  src_surface,
5153                                                  src_rect,
5154                                                  dst_surface,
5155                                                  dst_rect,
5156                                                  PP_PL3_LOAD_SAVE_PA,
5157                                                  NULL);
5158         intel_batchbuffer_flush(pp_context->batch);
5159         break;
5160
5161     default:
5162         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5163                                                       i965_image_pl3_processing,
5164                                                       src_surface,
5165                                                       src_rect,
5166                                                       dst_surface,
5167                                                       dst_rect);
5168         break;
5169     }
5170
5171     return vaStatus;
5172 }
5173
5174 static VAStatus
5175 i965_image_pl2_processing(VADriverContextP ctx,
5176                           const struct i965_surface *src_surface,
5177                           const VARectangle *src_rect,
5178                           struct i965_surface *dst_surface,
5179                           const VARectangle *dst_rect)
5180 {
5181     struct i965_driver_data *i965 = i965_driver_data(ctx);
5182     struct i965_post_processing_context *pp_context = i965->pp_context;
5183     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5184     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5185
5186     vaStatus = intel_common_scaling_post_processing(ctx,
5187                                                     pp_context,
5188                                                     src_surface,
5189                                                     src_rect,
5190                                                     dst_surface,
5191                                                     dst_rect);
5192
5193     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5194         return vaStatus;
5195
5196     switch (fourcc) {
5197     case VA_FOURCC_NV12:
5198         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5199                                                  src_surface,
5200                                                  src_rect,
5201                                                  dst_surface,
5202                                                  dst_rect,
5203                                                  PP_NV12_LOAD_SAVE_N12,
5204                                                  NULL);
5205         break;
5206
5207     case VA_FOURCC_IMC1:
5208     case VA_FOURCC_IMC3:
5209     case VA_FOURCC_YV12:
5210     case VA_FOURCC_I420:
5211         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5212                                                  src_surface,
5213                                                  src_rect,
5214                                                  dst_surface,
5215                                                  dst_rect,
5216                                                  PP_NV12_LOAD_SAVE_PL3,
5217                                                  NULL);
5218         break;
5219
5220     case VA_FOURCC_YUY2:
5221     case VA_FOURCC_UYVY:
5222         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5223                                                  src_surface,
5224                                                  src_rect,
5225                                                  dst_surface,
5226                                                  dst_rect,
5227                                                  PP_NV12_LOAD_SAVE_PA,
5228                                                  NULL);
5229         break;
5230
5231     case VA_FOURCC_BGRX:
5232     case VA_FOURCC_BGRA:
5233     case VA_FOURCC_RGBX:
5234     case VA_FOURCC_RGBA:
5235         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5236                                                  src_surface,
5237                                                  src_rect,
5238                                                  dst_surface,
5239                                                  dst_rect,
5240                                                  PP_NV12_LOAD_SAVE_RGBX,
5241                                                  NULL);
5242         break;
5243
5244     default:
5245         return VA_STATUS_ERROR_UNIMPLEMENTED;
5246     }
5247
5248     intel_batchbuffer_flush(pp_context->batch);
5249
5250     return vaStatus;
5251 }
5252
5253 static VAStatus
5254 i965_image_pl1_processing(VADriverContextP ctx,
5255                           const struct i965_surface *src_surface,
5256                           const VARectangle *src_rect,
5257                           struct i965_surface *dst_surface,
5258                           const VARectangle *dst_rect)
5259 {
5260     struct i965_driver_data *i965 = i965_driver_data(ctx);
5261     struct i965_post_processing_context *pp_context = i965->pp_context;
5262     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5263     VAStatus vaStatus;
5264
5265     vaStatus = intel_common_scaling_post_processing(ctx,
5266                                                     pp_context,
5267                                                     src_surface,
5268                                                     src_rect,
5269                                                     dst_surface,
5270                                                     dst_rect);
5271
5272     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5273         return vaStatus;
5274
5275     switch (fourcc) {
5276     case VA_FOURCC_NV12:
5277         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5278                                                  src_surface,
5279                                                  src_rect,
5280                                                  dst_surface,
5281                                                  dst_rect,
5282                                                  PP_PA_LOAD_SAVE_NV12,
5283                                                  NULL);
5284         intel_batchbuffer_flush(pp_context->batch);
5285         break;
5286
5287     case VA_FOURCC_YV12:
5288         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5289                                                  src_surface,
5290                                                  src_rect,
5291                                                  dst_surface,
5292                                                  dst_rect,
5293                                                  PP_PA_LOAD_SAVE_PL3,
5294                                                  NULL);
5295         intel_batchbuffer_flush(pp_context->batch);
5296         break;
5297
5298     case VA_FOURCC_YUY2:
5299     case VA_FOURCC_UYVY:
5300         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5301                                                  src_surface,
5302                                                  src_rect,
5303                                                  dst_surface,
5304                                                  dst_rect,
5305                                                  PP_PA_LOAD_SAVE_PA,
5306                                                  NULL);
5307         intel_batchbuffer_flush(pp_context->batch);
5308         break;
5309
5310     default:
5311         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5312                                                       i965_image_pl1_processing,
5313                                                       src_surface,
5314                                                       src_rect,
5315                                                       dst_surface,
5316                                                       dst_rect);
5317         break;
5318     }
5319
5320     return vaStatus;
5321 }
5322
5323 // it only support NV12 and P010 for vebox proc ctx
5324 static struct object_surface *derive_surface(VADriverContextP ctx,
5325                                              struct object_image *obj_image,
5326                                              struct object_surface *obj_surface)
5327 {
5328     VAImage * const image = &obj_image->image;
5329
5330     memset((void *)obj_surface, 0, sizeof(*obj_surface));
5331     obj_surface->fourcc = image->format.fourcc;
5332     obj_surface->orig_width = image->width;
5333     obj_surface->orig_height = image->height;
5334     obj_surface->width = image->pitches[0];
5335     obj_surface->height = image->height;
5336     obj_surface->y_cb_offset = image->offsets[1] / obj_surface->width;
5337     obj_surface->y_cr_offset = obj_surface->y_cb_offset;
5338     obj_surface->bo = obj_image->bo;
5339     obj_surface->subsampling = SUBSAMPLE_YUV420;
5340
5341     return obj_surface;
5342 }
5343
5344 static VAStatus
5345 vebox_processing_simple(VADriverContextP ctx,
5346                         struct i965_post_processing_context *pp_context,
5347                         struct object_surface *src_obj_surface,
5348                         struct object_surface *dst_obj_surface,
5349                         const VARectangle *rect)
5350 {
5351     struct i965_driver_data *i965 = i965_driver_data(ctx);
5352     VAProcPipelineParameterBuffer pipeline_param;
5353     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5354
5355     if (pp_context->vebox_proc_ctx == NULL) {
5356         pp_context->vebox_proc_ctx = gen75_vebox_context_init(ctx);
5357     }
5358
5359     memset((void *)&pipeline_param, 0, sizeof(pipeline_param));
5360     pipeline_param.surface_region = rect;
5361     pipeline_param.output_region = rect;
5362     pipeline_param.filter_flags = 0;
5363     pipeline_param.num_filters  = 0;
5364
5365     pp_context->vebox_proc_ctx->pipeline_param = &pipeline_param;
5366     pp_context->vebox_proc_ctx->surface_input_object = src_obj_surface;
5367     pp_context->vebox_proc_ctx->surface_output_object = dst_obj_surface;
5368
5369     if (IS_GEN9(i965->intel.device_info))
5370         status = gen9_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5371     else if (IS_GEN10(i965->intel.device_info))
5372         status = gen10_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5373
5374     return status;
5375 }
5376
5377 static VAStatus
5378 i965_image_p010_processing(VADriverContextP ctx,
5379                            const struct i965_surface *src_surface,
5380                            const VARectangle *src_rect,
5381                            struct i965_surface *dst_surface,
5382                            const VARectangle *dst_rect)
5383 {
5384 #define HAS_VPP_P010(ctx)        ((ctx)->codec_info->has_vpp_p010 && \
5385                                      (ctx)->intel.has_bsd)
5386
5387     struct i965_driver_data *i965 = i965_driver_data(ctx);
5388     struct i965_post_processing_context *pp_context = i965->pp_context;
5389     struct object_surface *src_obj_surface = NULL, *dst_obj_surface = NULL;
5390     struct object_surface tmp_src_obj_surface, tmp_dst_obj_surface;
5391     struct object_surface *tmp_surface = NULL;
5392     VASurfaceID tmp_surface_id[3], out_surface_id = VA_INVALID_ID;
5393     int num_tmp_surfaces = 0;
5394     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5395     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5396     int vpp_post = 0;
5397
5398     vaStatus = intel_common_scaling_post_processing(ctx,
5399                                                     pp_context,
5400                                                     src_surface,
5401                                                     src_rect,
5402                                                     dst_surface,
5403                                                     dst_rect);
5404
5405     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5406         return vaStatus;
5407
5408     if (HAS_VPP_P010(i965)) {
5409         vpp_post = 0;
5410         switch (fourcc) {
5411         case VA_FOURCC_NV12:
5412             if (src_rect->x != dst_rect->x ||
5413                 src_rect->y != dst_rect->y ||
5414                 src_rect->width != dst_rect->width ||
5415                 src_rect->height != dst_rect->height) {
5416                 vpp_post = 1;
5417             }
5418             break;
5419         case VA_FOURCC_P010:
5420             // don't support scaling while the fourcc of dst_surface is P010
5421             if (src_rect->x != dst_rect->x ||
5422                 src_rect->y != dst_rect->y ||
5423                 src_rect->width != dst_rect->width ||
5424                 src_rect->height != dst_rect->height) {
5425                 vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5426                 goto EXIT;
5427             }
5428             break;
5429         default:
5430             vpp_post = 1;
5431             break;
5432         }
5433
5434         if (src_surface->type == I965_SURFACE_TYPE_IMAGE) {
5435             src_obj_surface = derive_surface(ctx, (struct object_image *)src_surface->base,
5436                                              &tmp_src_obj_surface);
5437         } else
5438             src_obj_surface = (struct object_surface *)src_surface->base;
5439
5440         if (src_obj_surface == NULL) {
5441             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5442             goto EXIT;
5443         }
5444
5445         if (vpp_post == 1) {
5446             vaStatus = i965_CreateSurfaces(ctx,
5447                                            src_obj_surface->orig_width,
5448                                            src_obj_surface->orig_height,
5449                                            VA_RT_FORMAT_YUV420,
5450                                            1,
5451                                            &out_surface_id);
5452             assert(vaStatus == VA_STATUS_SUCCESS);
5453             tmp_surface_id[num_tmp_surfaces++] = out_surface_id;
5454             tmp_surface = SURFACE(out_surface_id);
5455             assert(tmp_surface);
5456             i965_check_alloc_surface_bo(ctx, tmp_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5457         }
5458
5459         if (tmp_surface != NULL)
5460             dst_obj_surface = tmp_surface;
5461         else {
5462             if (dst_surface->type == I965_SURFACE_TYPE_IMAGE) {
5463                 dst_obj_surface = derive_surface(ctx, (struct object_image *)dst_surface->base,
5464                                                  &tmp_dst_obj_surface);
5465             } else
5466                 dst_obj_surface = (struct object_surface *)dst_surface->base;
5467         }
5468
5469         if (dst_obj_surface == NULL) {
5470             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5471             goto EXIT;
5472         }
5473
5474         vaStatus = vebox_processing_simple(ctx,
5475                                            pp_context,
5476                                            src_obj_surface,
5477                                            dst_obj_surface,
5478                                            src_rect);
5479         if (vaStatus != VA_STATUS_SUCCESS)
5480             goto EXIT;
5481
5482         if (vpp_post == 1) {
5483             struct i965_surface src_surface_new;
5484
5485             if (tmp_surface != NULL) {
5486                 src_surface_new.base = (struct object_base *)tmp_surface;
5487                 src_surface_new.type = I965_SURFACE_TYPE_SURFACE;
5488                 src_surface_new.flags = I965_SURFACE_FLAG_FRAME;
5489             } else
5490                 memcpy((void *)&src_surface_new, (void *)src_surface, sizeof(src_surface_new));
5491
5492             vaStatus = i965_image_pl2_processing(ctx,
5493                                                  &src_surface_new,
5494                                                  src_rect,
5495                                                  dst_surface,
5496                                                  dst_rect);
5497         }
5498     }
5499
5500 EXIT:
5501     if (num_tmp_surfaces)
5502         i965_DestroySurfaces(ctx,
5503                              tmp_surface_id,
5504                              num_tmp_surfaces);
5505
5506     return vaStatus;
5507 }
5508
5509 VAStatus
5510 i965_image_processing(VADriverContextP ctx,
5511                       const struct i965_surface *src_surface,
5512                       const VARectangle *src_rect,
5513                       struct i965_surface *dst_surface,
5514                       const VARectangle *dst_rect)
5515 {
5516     struct i965_driver_data *i965 = i965_driver_data(ctx);
5517     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5518
5519     if (HAS_VPP(i965)) {
5520         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5521
5522         _i965LockMutex(&i965->pp_mutex);
5523
5524         switch (fourcc) {
5525         case VA_FOURCC_YV12:
5526         case VA_FOURCC_I420:
5527         case VA_FOURCC_IMC1:
5528         case VA_FOURCC_IMC3:
5529         case VA_FOURCC_422H:
5530         case VA_FOURCC_422V:
5531         case VA_FOURCC_411P:
5532         case VA_FOURCC_444P:
5533         case VA_FOURCC_YV16:
5534             status = i965_image_pl3_processing(ctx,
5535                                                src_surface,
5536                                                src_rect,
5537                                                dst_surface,
5538                                                dst_rect);
5539             break;
5540
5541         case  VA_FOURCC_NV12:
5542             status = i965_image_pl2_processing(ctx,
5543                                                src_surface,
5544                                                src_rect,
5545                                                dst_surface,
5546                                                dst_rect);
5547             break;
5548         case VA_FOURCC_YUY2:
5549         case VA_FOURCC_UYVY:
5550             status = i965_image_pl1_processing(ctx,
5551                                                src_surface,
5552                                                src_rect,
5553                                                dst_surface,
5554                                                dst_rect);
5555             break;
5556         case VA_FOURCC_BGRA:
5557         case VA_FOURCC_BGRX:
5558         case VA_FOURCC_RGBA:
5559         case VA_FOURCC_RGBX:
5560             status = i965_image_pl1_rgbx_processing(ctx,
5561                                                     src_surface,
5562                                                     src_rect,
5563                                                     dst_surface,
5564                                                     dst_rect);
5565             break;
5566         case VA_FOURCC_P010:
5567             status = i965_image_p010_processing(ctx,
5568                                                 src_surface,
5569                                                 src_rect,
5570                                                 dst_surface,
5571                                                 dst_rect);
5572             break;
5573         default:
5574             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5575             break;
5576         }
5577
5578         _i965UnlockMutex(&i965->pp_mutex);
5579     }
5580
5581     return status;
5582 }
5583
5584 static void
5585 i965_post_processing_context_finalize(VADriverContextP ctx,
5586                                       struct i965_post_processing_context *pp_context)
5587 {
5588     int i;
5589
5590     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5591     pp_context->surface_state_binding_table.bo = NULL;
5592
5593     dri_bo_unreference(pp_context->curbe.bo);
5594     pp_context->curbe.bo = NULL;
5595
5596     dri_bo_unreference(pp_context->sampler_state_table.bo);
5597     pp_context->sampler_state_table.bo = NULL;
5598
5599     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5600     pp_context->sampler_state_table.bo_8x8 = NULL;
5601
5602     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5603     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5604
5605     dri_bo_unreference(pp_context->idrt.bo);
5606     pp_context->idrt.bo = NULL;
5607     pp_context->idrt.num_interface_descriptors = 0;
5608
5609     dri_bo_unreference(pp_context->vfe_state.bo);
5610     pp_context->vfe_state.bo = NULL;
5611
5612     for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
5613         pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
5614                                   ctx);
5615
5616     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5617     pp_context->pp_dn_context.stmm_bo = NULL;
5618
5619     for (i = 0; i < NUM_PP_MODULES; i++) {
5620         struct pp_module *pp_module = &pp_context->pp_modules[i];
5621
5622         dri_bo_unreference(pp_module->kernel.bo);
5623         pp_module->kernel.bo = NULL;
5624     }
5625
5626     free(pp_context->pp_static_parameter);
5627     free(pp_context->pp_inline_parameter);
5628     pp_context->pp_static_parameter = NULL;
5629     pp_context->pp_inline_parameter = NULL;
5630 }
5631
5632 void
5633 i965_post_processing_terminate(VADriverContextP ctx)
5634 {
5635     struct i965_driver_data *i965 = i965_driver_data(ctx);
5636     struct i965_post_processing_context *pp_context = i965->pp_context;
5637
5638     if (pp_context) {
5639         pp_context->finalize(ctx, pp_context);
5640         free(pp_context);
5641     }
5642
5643     i965->pp_context = NULL;
5644 }
5645
5646 #define VPP_CURBE_ALLOCATION_SIZE   32
5647
5648 void
5649 i965_post_processing_context_init(VADriverContextP ctx,
5650                                   void *data,
5651                                   struct intel_batchbuffer *batch)
5652 {
5653     struct i965_driver_data *i965 = i965_driver_data(ctx);
5654     int i;
5655     struct i965_post_processing_context *pp_context = data;
5656     const AVSConfig *avs_config;
5657
5658     if (IS_IRONLAKE(i965->intel.device_info)) {
5659         pp_context->urb.size = i965->intel.device_info->urb_size;
5660         pp_context->urb.num_vfe_entries = 32;
5661         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5662         pp_context->urb.num_cs_entries = 1;
5663         pp_context->urb.size_cs_entry = 2;
5664         pp_context->urb.vfe_start = 0;
5665         pp_context->urb.cs_start = pp_context->urb.vfe_start +
5666                                    pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5667         assert(pp_context->urb.cs_start +
5668                pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5669         pp_context->intel_post_processing = ironlake_post_processing;
5670     } else {
5671         pp_context->vfe_gpu_state.max_num_threads = 60;
5672         pp_context->vfe_gpu_state.num_urb_entries = 59;
5673         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5674         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5675         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5676         pp_context->intel_post_processing = gen6_post_processing;
5677     }
5678
5679     pp_context->finalize = i965_post_processing_context_finalize;
5680
5681     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5682     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5683     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5684     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5685
5686     if (IS_HASWELL(i965->intel.device_info))
5687         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5688     else if (IS_GEN7(i965->intel.device_info))
5689         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5690     else if (IS_GEN6(i965->intel.device_info))
5691         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5692     else if (IS_IRONLAKE(i965->intel.device_info))
5693         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5694
5695     for (i = 0; i < NUM_PP_MODULES; i++) {
5696         struct pp_module *pp_module = &pp_context->pp_modules[i];
5697         dri_bo_unreference(pp_module->kernel.bo);
5698         if (pp_module->kernel.bin && pp_module->kernel.size) {
5699             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5700                                                 pp_module->kernel.name,
5701                                                 pp_module->kernel.size,
5702                                                 4096);
5703             assert(pp_module->kernel.bo);
5704             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5705         } else {
5706             pp_module->kernel.bo = NULL;
5707         }
5708     }
5709
5710     /* static & inline parameters */
5711     if (IS_GEN7(i965->intel.device_info)) {
5712         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5713         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5714     } else {
5715         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5716         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5717     }
5718
5719     pp_context->batch = batch;
5720     pp_dndi_context_init(&pp_context->pp_dndi_context);
5721
5722     avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
5723                  &gen6_avs_config;
5724     avs_init_state(&pp_context->pp_avs_context.state, avs_config);
5725 }
5726
5727 bool
5728 i965_post_processing_init(VADriverContextP ctx)
5729 {
5730     struct i965_driver_data *i965 = i965_driver_data(ctx);
5731     struct i965_post_processing_context *pp_context = i965->pp_context;
5732
5733     if (HAS_VPP(i965)) {
5734         if (pp_context == NULL) {
5735             pp_context = calloc(1, sizeof(*pp_context));
5736             assert(pp_context);
5737             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5738             i965->pp_context = pp_context;
5739         }
5740     }
5741
5742     return true;
5743 }
5744
5745 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5746     PP_NULL,    /* VAProcFilterNone */
5747     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5748     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5749     PP_NULL,    /* VAProcFilterSharpening */
5750     PP_NULL,    /* VAProcFilterColorBalance */
5751 };
5752
5753 static const int proc_frame_to_pp_frame[3] = {
5754     I965_SURFACE_FLAG_FRAME,
5755     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5756     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5757 };
5758
5759 enum {
5760     PP_OP_CHANGE_FORMAT = 1 << 0,
5761     PP_OP_CHANGE_SIZE   = 1 << 1,
5762     PP_OP_DEINTERLACE   = 1 << 2,
5763     PP_OP_COMPLEX       = 1 << 3,
5764 };
5765
5766 static int
5767 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
5768                     uint32_t filter_flags)
5769 {
5770     int pp_index = -1;
5771
5772     if (!dst_fourcc)
5773         dst_fourcc = src_fourcc;
5774
5775     switch (src_fourcc) {
5776     case VA_FOURCC_RGBX:
5777     case VA_FOURCC_RGBA:
5778     case VA_FOURCC_BGRX:
5779     case VA_FOURCC_BGRA:
5780         switch (dst_fourcc) {
5781         case VA_FOURCC_NV12:
5782             pp_index = PP_RGBX_LOAD_SAVE_NV12;
5783             break;
5784         }
5785         break;
5786     case VA_FOURCC_YUY2:
5787     case VA_FOURCC_UYVY:
5788         switch (dst_fourcc) {
5789         case VA_FOURCC_NV12:
5790             pp_index = PP_PA_LOAD_SAVE_NV12;
5791             break;
5792         case VA_FOURCC_I420:
5793         case VA_FOURCC_YV12:
5794             pp_index = PP_PA_LOAD_SAVE_PL3;
5795             break;
5796         case VA_FOURCC_YUY2:
5797         case VA_FOURCC_UYVY:
5798             pp_index = PP_PA_LOAD_SAVE_PA;
5799             break;
5800         }
5801         break;
5802     case VA_FOURCC_NV12:
5803         switch (dst_fourcc) {
5804         case VA_FOURCC_NV12:
5805             if (pp_ops & PP_OP_CHANGE_SIZE)
5806                 pp_index = avs_is_needed(filter_flags) ?
5807                            PP_NV12_AVS : PP_NV12_SCALING;
5808             else
5809                 pp_index = PP_NV12_LOAD_SAVE_N12;
5810             break;
5811         case VA_FOURCC_I420:
5812         case VA_FOURCC_YV12:
5813         case VA_FOURCC_IMC1:
5814         case VA_FOURCC_IMC3:
5815             pp_index = PP_NV12_LOAD_SAVE_PL3;
5816             break;
5817         case VA_FOURCC_YUY2:
5818         case VA_FOURCC_UYVY:
5819             pp_index = PP_NV12_LOAD_SAVE_PA;
5820             break;
5821         case VA_FOURCC_RGBX:
5822         case VA_FOURCC_RGBA:
5823         case VA_FOURCC_BGRX:
5824         case VA_FOURCC_BGRA:
5825             pp_index = PP_NV12_LOAD_SAVE_RGBX;
5826             break;
5827         }
5828         break;
5829     case VA_FOURCC_I420:
5830     case VA_FOURCC_YV12:
5831     case VA_FOURCC_IMC1:
5832     case VA_FOURCC_IMC3:
5833     case VA_FOURCC_YV16:
5834     case VA_FOURCC_411P:
5835     case VA_FOURCC_422H:
5836     case VA_FOURCC_422V:
5837     case VA_FOURCC_444P:
5838         switch (dst_fourcc) {
5839         case VA_FOURCC_NV12:
5840             pp_index = PP_PL3_LOAD_SAVE_N12;
5841             break;
5842         case VA_FOURCC_I420:
5843         case VA_FOURCC_YV12:
5844         case VA_FOURCC_IMC1:
5845         case VA_FOURCC_IMC3:
5846             pp_index = PP_PL3_LOAD_SAVE_PL3;
5847             break;
5848         case VA_FOURCC_YUY2:
5849         case VA_FOURCC_UYVY:
5850             pp_index = PP_PL3_LOAD_SAVE_PA;
5851             break;
5852         }
5853         break;
5854     }
5855     return pp_index;
5856 }
5857
5858 static VAStatus
5859 i965_proc_picture_fast(VADriverContextP ctx,
5860                        struct i965_proc_context *proc_context, struct proc_state *proc_state)
5861 {
5862     struct i965_driver_data * const i965 = i965_driver_data(ctx);
5863     const VAProcPipelineParameterBuffer * const pipeline_param =
5864         (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5865     struct object_surface *src_obj_surface, *dst_obj_surface;
5866     struct i965_surface src_surface, dst_surface;
5867     const VAProcFilterParameterBufferDeinterlacing *deint_params = NULL;
5868     VARectangle src_rect, dst_rect;
5869     VAStatus status;
5870     uint32_t i, filter_flags = 0, pp_ops = 0;
5871     int pp_index;
5872
5873     /* Validate pipeline parameters */
5874     if (pipeline_param->num_filters > 0 && !pipeline_param->filters)
5875         return VA_STATUS_ERROR_INVALID_PARAMETER;
5876
5877     for (i = 0; i < pipeline_param->num_filters; i++) {
5878         const VAProcFilterParameterBuffer *filter;
5879         struct object_buffer * const obj_buffer =
5880             BUFFER(pipeline_param->filters[i]);
5881
5882         assert(obj_buffer && obj_buffer->buffer_store);
5883         if (!obj_buffer || !obj_buffer->buffer_store)
5884             return VA_STATUS_ERROR_INVALID_PARAMETER;
5885
5886         filter = (VAProcFilterParameterBuffer *)
5887                  obj_buffer->buffer_store->buffer;
5888         switch (filter->type) {
5889         case VAProcFilterDeinterlacing:
5890             pp_ops |= PP_OP_DEINTERLACE;
5891             deint_params = (VAProcFilterParameterBufferDeinterlacing *)filter;
5892             break;
5893         default:
5894             pp_ops |= PP_OP_COMPLEX;
5895             break;
5896         }
5897     }
5898     filter_flags |= pipeline_param->filter_flags & VA_FILTER_SCALING_MASK;
5899
5900     /* Validate source surface */
5901     src_obj_surface = SURFACE(pipeline_param->surface);
5902     if (!src_obj_surface)
5903         return VA_STATUS_ERROR_INVALID_SURFACE;
5904
5905     if (!src_obj_surface->fourcc)
5906         return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
5907
5908     if (!src_obj_surface->bo)
5909         return VA_STATUS_ERROR_INVALID_SURFACE;
5910
5911     if (pipeline_param->surface_region) {
5912         src_rect.x = pipeline_param->surface_region->x;
5913         src_rect.y = pipeline_param->surface_region->y;
5914         src_rect.width = pipeline_param->surface_region->width;
5915         src_rect.height = pipeline_param->surface_region->height;
5916     } else {
5917         src_rect.x = 0;
5918         src_rect.y = 0;
5919         src_rect.width = src_obj_surface->orig_width;
5920         src_rect.height = src_obj_surface->orig_height;
5921     }
5922
5923     src_surface.base  = &src_obj_surface->base;
5924     src_surface.type  = I965_SURFACE_TYPE_SURFACE;
5925     src_surface.flags = I965_SURFACE_FLAG_FRAME;
5926
5927     if (pp_ops & PP_OP_DEINTERLACE) {
5928         filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
5929                         VA_TOP_FIELD : VA_BOTTOM_FIELD;
5930         if (deint_params->algorithm != VAProcDeinterlacingBob)
5931             pp_ops |= PP_OP_COMPLEX;
5932     } else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
5933         filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
5934                         VA_TOP_FIELD : VA_BOTTOM_FIELD;
5935         pp_ops |= PP_OP_DEINTERLACE;
5936     }
5937     if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
5938         pp_ops |= PP_OP_COMPLEX;
5939
5940     /* Validate target surface */
5941     dst_obj_surface = SURFACE(proc_state->current_render_target);
5942     if (!dst_obj_surface)
5943         return VA_STATUS_ERROR_INVALID_SURFACE;
5944
5945     if (!dst_obj_surface->bo)
5946         return VA_STATUS_ERROR_INVALID_SURFACE;
5947
5948     if (dst_obj_surface->fourcc &&
5949         dst_obj_surface->fourcc != src_obj_surface->fourcc)
5950         pp_ops |= PP_OP_CHANGE_FORMAT;
5951
5952     if (pipeline_param->output_region) {
5953         dst_rect.x = pipeline_param->output_region->x;
5954         dst_rect.y = pipeline_param->output_region->y;
5955         dst_rect.width = pipeline_param->output_region->width;
5956         dst_rect.height = pipeline_param->output_region->height;
5957     } else {
5958         dst_rect.x = 0;
5959         dst_rect.y = 0;
5960         dst_rect.width = dst_obj_surface->orig_width;
5961         dst_rect.height = dst_obj_surface->orig_height;
5962     }
5963
5964     if (dst_rect.width != src_rect.width || dst_rect.height != src_rect.height)
5965         pp_ops |= PP_OP_CHANGE_SIZE;
5966
5967     dst_surface.base  = &dst_obj_surface->base;
5968     dst_surface.type  = I965_SURFACE_TYPE_SURFACE;
5969     dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5970
5971     /* Validate "fast-path" processing capabilities */
5972     if (!IS_GEN7(i965->intel.device_info)) {
5973         if ((pp_ops & PP_OP_CHANGE_FORMAT) && (pp_ops & PP_OP_CHANGE_SIZE))
5974             return VA_STATUS_ERROR_UNIMPLEMENTED; // temporary surface is needed
5975     }
5976     if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
5977         filter_flags &= ~VA_FILTER_SCALING_MASK;
5978         filter_flags |= VA_FILTER_SCALING_FAST;
5979     } else {
5980         if (pp_ops & PP_OP_COMPLEX)
5981             return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
5982         if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
5983             return VA_STATUS_ERROR_UNIMPLEMENTED;
5984     }
5985
5986     pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
5987                                    dst_obj_surface->fourcc, pp_ops, filter_flags);
5988     if (pp_index < 0)
5989         return VA_STATUS_ERROR_UNIMPLEMENTED;
5990
5991     proc_context->pp_context.filter_flags = filter_flags;
5992     status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5993                                            &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
5994     intel_batchbuffer_flush(proc_context->pp_context.batch);
5995     return status;
5996 }
5997
5998 VAStatus
5999 i965_proc_picture(VADriverContextP ctx,
6000                   VAProfile profile,
6001                   union codec_state *codec_state,
6002                   struct hw_context *hw_context)
6003 {
6004     struct i965_driver_data *i965 = i965_driver_data(ctx);
6005     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
6006     struct proc_state *proc_state = &codec_state->proc;
6007     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
6008     struct object_surface *obj_surface;
6009     struct i965_surface src_surface, dst_surface;
6010     VARectangle src_rect, dst_rect;
6011     VAStatus status;
6012     int i;
6013     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
6014     int num_tmp_surfaces = 0;
6015     unsigned int tiling = 0, swizzle = 0;
6016     int in_width, in_height;
6017
6018     if (pipeline_param->surface == VA_INVALID_ID ||
6019         proc_state->current_render_target == VA_INVALID_ID) {
6020         status = VA_STATUS_ERROR_INVALID_SURFACE;
6021         goto error;
6022     }
6023
6024     obj_surface = SURFACE(proc_state->current_render_target);
6025     if (!obj_surface)
6026         return VA_STATUS_ERROR_INVALID_SURFACE;
6027
6028     if (!obj_surface->bo) {
6029         unsigned int expected_format = obj_surface->expected_format;
6030         int fourcc = 0;
6031         int subsample = 0;
6032         int tiling = HAS_TILED_SURFACE(i965);
6033         switch (expected_format) {
6034         case VA_RT_FORMAT_YUV420:
6035             fourcc = VA_FOURCC_NV12;
6036             subsample = SUBSAMPLE_YUV420;
6037             break;
6038         case VA_RT_FORMAT_YUV420_10BPP:
6039             fourcc = VA_FOURCC_P010;
6040             subsample = SUBSAMPLE_YUV420;
6041             break;
6042         case VA_RT_FORMAT_RGB32:
6043             fourcc = VA_FOURCC_RGBA;
6044             subsample = SUBSAMPLE_RGBX;
6045             break;
6046         default:
6047             return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
6048         }
6049         i965_check_alloc_surface_bo(ctx, obj_surface, tiling, fourcc, subsample);
6050     }
6051
6052     obj_surface = SURFACE(pipeline_param->surface);
6053
6054     if (!obj_surface) {
6055         status = VA_STATUS_ERROR_INVALID_SURFACE;
6056         goto error;
6057     }
6058
6059     if (!obj_surface->bo) {
6060         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6061         goto error;
6062     }
6063
6064     if (pipeline_param->num_filters && !pipeline_param->filters) {
6065         status = VA_STATUS_ERROR_INVALID_PARAMETER;
6066         goto error;
6067     }
6068
6069     status = i965_proc_picture_fast(ctx, proc_context, proc_state);
6070     if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
6071         return status;
6072
6073     in_width = obj_surface->orig_width;
6074     in_height = obj_surface->orig_height;
6075     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6076
6077     src_surface.base = (struct object_base *)obj_surface;
6078     src_surface.type = I965_SURFACE_TYPE_SURFACE;
6079     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6080
6081     VASurfaceID out_surface_id = VA_INVALID_ID;
6082     if (obj_surface->fourcc != VA_FOURCC_NV12) {
6083         src_surface.base = (struct object_base *)obj_surface;
6084         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6085         src_surface.flags = I965_SURFACE_FLAG_FRAME;
6086         src_rect.x = 0;
6087         src_rect.y = 0;
6088         src_rect.width = in_width;
6089         src_rect.height = in_height;
6090
6091         status = i965_CreateSurfaces(ctx,
6092                                      in_width,
6093                                      in_height,
6094                                      VA_RT_FORMAT_YUV420,
6095                                      1,
6096                                      &out_surface_id);
6097         if (status != VA_STATUS_SUCCESS)
6098             goto error;
6099         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6100         obj_surface = SURFACE(out_surface_id);
6101         assert(obj_surface);
6102         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6103
6104         dst_surface.base = (struct object_base *)obj_surface;
6105         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6106         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6107         dst_rect.x = 0;
6108         dst_rect.y = 0;
6109         dst_rect.width = in_width;
6110         dst_rect.height = in_height;
6111
6112         status = i965_image_processing(ctx,
6113                                        &src_surface,
6114                                        &src_rect,
6115                                        &dst_surface,
6116                                        &dst_rect);
6117         if (status != VA_STATUS_SUCCESS)
6118             goto error;
6119
6120         src_surface.base = (struct object_base *)obj_surface;
6121         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6122         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6123     }
6124
6125     if (pipeline_param->surface_region) {
6126         src_rect.x = pipeline_param->surface_region->x;
6127         src_rect.y = pipeline_param->surface_region->y;
6128         src_rect.width = pipeline_param->surface_region->width;
6129         src_rect.height = pipeline_param->surface_region->height;
6130     } else {
6131         src_rect.x = 0;
6132         src_rect.y = 0;
6133         src_rect.width = in_width;
6134         src_rect.height = in_height;
6135     }
6136
6137     proc_context->pp_context.pipeline_param = pipeline_param;
6138
6139     for (i = 0; i < pipeline_param->num_filters; i++) {
6140         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6141         VAProcFilterParameterBufferBase *filter_param = NULL;
6142         VAProcFilterType filter_type;
6143         int kernel_index;
6144
6145         if (!obj_buffer ||
6146             !obj_buffer->buffer_store ||
6147             !obj_buffer->buffer_store->buffer) {
6148             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6149             goto error;
6150         }
6151
6152         out_surface_id = VA_INVALID_ID;
6153         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6154         filter_type = filter_param->type;
6155         kernel_index = procfilter_to_pp_flag[filter_type];
6156
6157         if (kernel_index != PP_NULL &&
6158             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6159             status = i965_CreateSurfaces(ctx,
6160                                          in_width,
6161                                          in_height,
6162                                          VA_RT_FORMAT_YUV420,
6163                                          1,
6164                                          &out_surface_id);
6165             assert(status == VA_STATUS_SUCCESS);
6166             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6167             obj_surface = SURFACE(out_surface_id);
6168             assert(obj_surface);
6169             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6170             dst_surface.base = (struct object_base *)obj_surface;
6171             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6172             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6173                                                    &src_surface,
6174                                                    &src_rect,
6175                                                    &dst_surface,
6176                                                    &src_rect,
6177                                                    kernel_index,
6178                                                    filter_param);
6179
6180             if (status == VA_STATUS_SUCCESS) {
6181                 src_surface.base = dst_surface.base;
6182                 src_surface.type = dst_surface.type;
6183                 src_surface.flags = dst_surface.flags;
6184             }
6185         }
6186     }
6187
6188     proc_context->pp_context.pipeline_param = NULL;
6189     obj_surface = SURFACE(proc_state->current_render_target);
6190
6191     if (!obj_surface) {
6192         status = VA_STATUS_ERROR_INVALID_SURFACE;
6193         goto error;
6194     }
6195
6196     if (pipeline_param->output_region) {
6197         dst_rect.x = pipeline_param->output_region->x;
6198         dst_rect.y = pipeline_param->output_region->y;
6199         dst_rect.width = pipeline_param->output_region->width;
6200         dst_rect.height = pipeline_param->output_region->height;
6201     } else {
6202         dst_rect.x = 0;
6203         dst_rect.y = 0;
6204         dst_rect.width = obj_surface->orig_width;
6205         dst_rect.height = obj_surface->orig_height;
6206     }
6207
6208     if (IS_GEN7(i965->intel.device_info) ||
6209         IS_GEN8(i965->intel.device_info) ||
6210         IS_GEN9(i965->intel.device_info) ||
6211         IS_GEN10(i965->intel.device_info)) {
6212         unsigned int saved_filter_flag;
6213         struct i965_post_processing_context *i965pp_context = i965->pp_context;
6214
6215         if (obj_surface->fourcc == 0) {
6216             i965_check_alloc_surface_bo(ctx, obj_surface, 1,
6217                                         VA_FOURCC_NV12,
6218                                         SUBSAMPLE_YUV420);
6219         }
6220
6221         i965_vpp_clear_surface(ctx, &proc_context->pp_context,
6222                                obj_surface,
6223                                pipeline_param->output_background_color);
6224
6225         intel_batchbuffer_flush(hw_context->batch);
6226
6227         saved_filter_flag = i965pp_context->filter_flags;
6228         i965pp_context->filter_flags = (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK);
6229
6230         dst_surface.base = (struct object_base *)obj_surface;
6231         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6232         i965_image_processing(ctx, &src_surface, &src_rect, &dst_surface, &dst_rect);
6233
6234         i965pp_context->filter_flags = saved_filter_flag;
6235
6236         if (num_tmp_surfaces)
6237             i965_DestroySurfaces(ctx,
6238                                  tmp_surfaces,
6239                                  num_tmp_surfaces);
6240
6241         return VA_STATUS_SUCCESS;
6242     }
6243
6244     int csc_needed = 0;
6245     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12) {
6246         csc_needed = 1;
6247         out_surface_id = VA_INVALID_ID;
6248         status = i965_CreateSurfaces(ctx,
6249                                      obj_surface->orig_width,
6250                                      obj_surface->orig_height,
6251                                      VA_RT_FORMAT_YUV420,
6252                                      1,
6253                                      &out_surface_id);
6254         assert(status == VA_STATUS_SUCCESS);
6255         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6256         struct object_surface *csc_surface = SURFACE(out_surface_id);
6257         assert(csc_surface);
6258         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6259         dst_surface.base = (struct object_base *)csc_surface;
6260     } else {
6261         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6262         dst_surface.base = (struct object_base *)obj_surface;
6263     }
6264
6265     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6266     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color);
6267
6268     // load/save doesn't support different origin offset for src and dst surface
6269     if (src_rect.width == dst_rect.width &&
6270         src_rect.height == dst_rect.height &&
6271         src_rect.x == dst_rect.x &&
6272         src_rect.y == dst_rect.y) {
6273         i965_post_processing_internal(ctx, &proc_context->pp_context,
6274                                       &src_surface,
6275                                       &src_rect,
6276                                       &dst_surface,
6277                                       &dst_rect,
6278                                       PP_NV12_LOAD_SAVE_N12,
6279                                       NULL);
6280     } else {
6281
6282         proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
6283         i965_post_processing_internal(ctx, &proc_context->pp_context,
6284                                       &src_surface,
6285                                       &src_rect,
6286                                       &dst_surface,
6287                                       &dst_rect,
6288                                       avs_is_needed(pipeline_param->filter_flags) ? PP_NV12_AVS : PP_NV12_SCALING,
6289                                       NULL);
6290     }
6291
6292     if (csc_needed) {
6293         src_surface.base = dst_surface.base;
6294         src_surface.type = dst_surface.type;
6295         src_surface.flags = dst_surface.flags;
6296         dst_surface.base = (struct object_base *)obj_surface;
6297         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6298         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6299     }
6300
6301     if (num_tmp_surfaces)
6302         i965_DestroySurfaces(ctx,
6303                              tmp_surfaces,
6304                              num_tmp_surfaces);
6305
6306     intel_batchbuffer_flush(hw_context->batch);
6307
6308     return VA_STATUS_SUCCESS;
6309
6310 error:
6311     if (num_tmp_surfaces)
6312         i965_DestroySurfaces(ctx,
6313                              tmp_surfaces,
6314                              num_tmp_surfaces);
6315
6316     return status;
6317 }
6318
6319 static void
6320 i965_proc_context_destroy(void *hw_context)
6321 {
6322     struct i965_proc_context * const proc_context = hw_context;
6323     VADriverContextP const ctx = proc_context->driver_context;
6324
6325     proc_context->pp_context.finalize(ctx, &proc_context->pp_context);
6326     intel_batchbuffer_free(proc_context->base.batch);
6327     free(proc_context);
6328 }
6329
6330 struct hw_context *
6331 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6332 {
6333     struct i965_driver_data *i965 = i965_driver_data(ctx);
6334     struct intel_driver_data *intel = intel_driver_data(ctx);
6335     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6336
6337     if (!proc_context)
6338         return NULL;
6339
6340     proc_context->base.destroy = i965_proc_context_destroy;
6341     proc_context->base.run = i965_proc_picture;
6342     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6343     proc_context->driver_context = ctx;
6344     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6345
6346     return (struct hw_context *)proc_context;
6347 }
6348
6349