2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "i965_yuv_coefs.h"
42 #include "intel_media.h"
44 #include "gen75_picture_process.h"
47 vpp_surface_convert(VADriverContextP ctx,
48 struct object_surface *src_obj_surf,
49 struct object_surface *dst_obj_surf);
51 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
53 #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
54 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
56 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
57 #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
59 #define GPU_ASM_BLOCK_WIDTH 16
60 #define GPU_ASM_BLOCK_HEIGHT 8
61 #define GPU_ASM_X_OFFSET_ALIGNMENT 4
63 #define VA_STATUS_SUCCESS_1 0xFFFFFFFE
65 static const uint32_t pp_null_gen5[][4] = {
66 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
69 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
70 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
73 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
74 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
77 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
78 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
81 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
82 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
85 static const uint32_t pp_nv12_scaling_gen5[][4] = {
86 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
89 static const uint32_t pp_nv12_avs_gen5[][4] = {
90 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
93 static const uint32_t pp_nv12_dndi_gen5[][4] = {
94 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
97 static const uint32_t pp_nv12_dn_gen5[][4] = {
98 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
101 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
102 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
105 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
106 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
109 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
110 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
113 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
114 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
117 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
118 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
121 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
122 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
125 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
126 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
129 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
130 const struct i965_surface *src_surface,
131 const VARectangle *src_rect,
132 struct i965_surface *dst_surface,
133 const VARectangle *dst_rect,
136 pp_nv12_avs_initialize(VADriverContextP ctx,
137 struct i965_post_processing_context *pp_context,
138 const struct i965_surface *src_surface, const VARectangle *src_rect,
139 struct i965_surface *dst_surface, const VARectangle *dst_rect,
141 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
142 const struct i965_surface *src_surface,
143 const VARectangle *src_rect,
144 struct i965_surface *dst_surface,
145 const VARectangle *dst_rect,
147 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
148 const struct i965_surface *src_surface,
149 const VARectangle *src_rect,
150 struct i965_surface *dst_surface,
151 const VARectangle *dst_rect,
153 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
154 const struct i965_surface *src_surface,
155 const VARectangle *src_rect,
156 struct i965_surface *dst_surface,
157 const VARectangle *dst_rect,
159 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
160 const struct i965_surface *src_surface,
161 const VARectangle *src_rect,
162 struct i965_surface *dst_surface,
163 const VARectangle *dst_rect,
165 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
166 const struct i965_surface *src_surface,
167 const VARectangle *src_rect,
168 struct i965_surface *dst_surface,
169 const VARectangle *dst_rect,
172 static struct pp_module pp_modules_gen5[] = {
175 "NULL module (for testing)",
178 sizeof(pp_null_gen5),
188 PP_NV12_LOAD_SAVE_N12,
189 pp_nv12_load_save_nv12_gen5,
190 sizeof(pp_nv12_load_save_nv12_gen5),
194 pp_plx_load_save_plx_initialize,
200 PP_NV12_LOAD_SAVE_PL3,
201 pp_nv12_load_save_pl3_gen5,
202 sizeof(pp_nv12_load_save_pl3_gen5),
206 pp_plx_load_save_plx_initialize,
212 PP_PL3_LOAD_SAVE_N12,
213 pp_pl3_load_save_nv12_gen5,
214 sizeof(pp_pl3_load_save_nv12_gen5),
218 pp_plx_load_save_plx_initialize,
224 PP_PL3_LOAD_SAVE_PL3,
225 pp_pl3_load_save_pl3_gen5,
226 sizeof(pp_pl3_load_save_pl3_gen5),
230 pp_plx_load_save_plx_initialize
235 "NV12 Scaling module",
237 pp_nv12_scaling_gen5,
238 sizeof(pp_nv12_scaling_gen5),
242 pp_nv12_scaling_initialize,
250 sizeof(pp_nv12_avs_gen5),
254 pp_nv12_avs_initialize,
262 sizeof(pp_nv12_dndi_gen5),
266 pp_nv12_dndi_initialize,
274 sizeof(pp_nv12_dn_gen5),
278 pp_nv12_dn_initialize,
284 PP_NV12_LOAD_SAVE_PA,
285 pp_nv12_load_save_pa_gen5,
286 sizeof(pp_nv12_load_save_pa_gen5),
290 pp_plx_load_save_plx_initialize,
297 pp_pl3_load_save_pa_gen5,
298 sizeof(pp_pl3_load_save_pa_gen5),
302 pp_plx_load_save_plx_initialize,
308 PP_PA_LOAD_SAVE_NV12,
309 pp_pa_load_save_nv12_gen5,
310 sizeof(pp_pa_load_save_nv12_gen5),
314 pp_plx_load_save_plx_initialize,
321 pp_pa_load_save_pl3_gen5,
322 sizeof(pp_pa_load_save_pl3_gen5),
326 pp_plx_load_save_plx_initialize,
333 pp_pa_load_save_pa_gen5,
334 sizeof(pp_pa_load_save_pa_gen5),
338 pp_plx_load_save_plx_initialize,
344 PP_RGBX_LOAD_SAVE_NV12,
345 pp_rgbx_load_save_nv12_gen5,
346 sizeof(pp_rgbx_load_save_nv12_gen5),
350 pp_plx_load_save_plx_initialize,
356 PP_NV12_LOAD_SAVE_RGBX,
357 pp_nv12_load_save_rgbx_gen5,
358 sizeof(pp_nv12_load_save_rgbx_gen5),
362 pp_plx_load_save_plx_initialize,
366 static const uint32_t pp_null_gen6[][4] = {
367 #include "shaders/post_processing/gen5_6/null.g6b"
370 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
371 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
374 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
375 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
378 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
379 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
382 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
383 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
386 static const uint32_t pp_nv12_scaling_gen6[][4] = {
387 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
390 static const uint32_t pp_nv12_avs_gen6[][4] = {
391 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
394 static const uint32_t pp_nv12_dndi_gen6[][4] = {
395 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
398 static const uint32_t pp_nv12_dn_gen6[][4] = {
399 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
402 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
403 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
406 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
407 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
410 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
411 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
414 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
415 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
418 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
419 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
422 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
423 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
426 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
427 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
430 static struct pp_module pp_modules_gen6[] = {
433 "NULL module (for testing)",
436 sizeof(pp_null_gen6),
446 PP_NV12_LOAD_SAVE_N12,
447 pp_nv12_load_save_nv12_gen6,
448 sizeof(pp_nv12_load_save_nv12_gen6),
452 pp_plx_load_save_plx_initialize,
458 PP_NV12_LOAD_SAVE_PL3,
459 pp_nv12_load_save_pl3_gen6,
460 sizeof(pp_nv12_load_save_pl3_gen6),
464 pp_plx_load_save_plx_initialize,
470 PP_PL3_LOAD_SAVE_N12,
471 pp_pl3_load_save_nv12_gen6,
472 sizeof(pp_pl3_load_save_nv12_gen6),
476 pp_plx_load_save_plx_initialize,
482 PP_PL3_LOAD_SAVE_PL3,
483 pp_pl3_load_save_pl3_gen6,
484 sizeof(pp_pl3_load_save_pl3_gen6),
488 pp_plx_load_save_plx_initialize,
493 "NV12 Scaling module",
495 pp_nv12_scaling_gen6,
496 sizeof(pp_nv12_scaling_gen6),
500 gen6_nv12_scaling_initialize,
508 sizeof(pp_nv12_avs_gen6),
512 pp_nv12_avs_initialize,
520 sizeof(pp_nv12_dndi_gen6),
524 pp_nv12_dndi_initialize,
532 sizeof(pp_nv12_dn_gen6),
536 pp_nv12_dn_initialize,
541 PP_NV12_LOAD_SAVE_PA,
542 pp_nv12_load_save_pa_gen6,
543 sizeof(pp_nv12_load_save_pa_gen6),
547 pp_plx_load_save_plx_initialize,
554 pp_pl3_load_save_pa_gen6,
555 sizeof(pp_pl3_load_save_pa_gen6),
559 pp_plx_load_save_plx_initialize,
565 PP_PA_LOAD_SAVE_NV12,
566 pp_pa_load_save_nv12_gen6,
567 sizeof(pp_pa_load_save_nv12_gen6),
571 pp_plx_load_save_plx_initialize,
578 pp_pa_load_save_pl3_gen6,
579 sizeof(pp_pa_load_save_pl3_gen6),
583 pp_plx_load_save_plx_initialize,
590 pp_pa_load_save_pa_gen6,
591 sizeof(pp_pa_load_save_pa_gen6),
595 pp_plx_load_save_plx_initialize,
601 PP_RGBX_LOAD_SAVE_NV12,
602 pp_rgbx_load_save_nv12_gen6,
603 sizeof(pp_rgbx_load_save_nv12_gen6),
607 pp_plx_load_save_plx_initialize,
613 PP_NV12_LOAD_SAVE_RGBX,
614 pp_nv12_load_save_rgbx_gen6,
615 sizeof(pp_nv12_load_save_rgbx_gen6),
619 pp_plx_load_save_plx_initialize,
623 static const uint32_t pp_null_gen7[][4] = {
626 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
630 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
631 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
634 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
635 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
638 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
639 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
642 static const uint32_t pp_nv12_scaling_gen7[][4] = {
643 #include "shaders/post_processing/gen7/avs.g7b"
646 static const uint32_t pp_nv12_avs_gen7[][4] = {
647 #include "shaders/post_processing/gen7/avs.g7b"
650 static const uint32_t pp_nv12_dndi_gen7[][4] = {
651 #include "shaders/post_processing/gen7/dndi.g7b"
654 static const uint32_t pp_nv12_dn_gen7[][4] = {
655 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
657 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
658 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
660 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
661 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
663 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
664 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
666 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
667 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
669 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
670 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
672 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
673 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
675 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
676 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
679 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
680 const struct i965_surface *src_surface,
681 const VARectangle *src_rect,
682 struct i965_surface *dst_surface,
683 const VARectangle *dst_rect,
685 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
686 const struct i965_surface *src_surface,
687 const VARectangle *src_rect,
688 struct i965_surface *dst_surface,
689 const VARectangle *dst_rect,
691 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
692 const struct i965_surface *src_surface,
693 const VARectangle *src_rect,
694 struct i965_surface *dst_surface,
695 const VARectangle *dst_rect,
698 static struct pp_module pp_modules_gen7[] = {
701 "NULL module (for testing)",
704 sizeof(pp_null_gen7),
714 PP_NV12_LOAD_SAVE_N12,
715 pp_nv12_load_save_nv12_gen7,
716 sizeof(pp_nv12_load_save_nv12_gen7),
720 gen7_pp_plx_avs_initialize,
726 PP_NV12_LOAD_SAVE_PL3,
727 pp_nv12_load_save_pl3_gen7,
728 sizeof(pp_nv12_load_save_pl3_gen7),
732 gen7_pp_plx_avs_initialize,
738 PP_PL3_LOAD_SAVE_N12,
739 pp_pl3_load_save_nv12_gen7,
740 sizeof(pp_pl3_load_save_nv12_gen7),
744 gen7_pp_plx_avs_initialize,
750 PP_PL3_LOAD_SAVE_PL3,
751 pp_pl3_load_save_pl3_gen7,
752 sizeof(pp_pl3_load_save_pl3_gen7),
756 gen7_pp_plx_avs_initialize,
761 "NV12 Scaling module",
763 pp_nv12_scaling_gen7,
764 sizeof(pp_nv12_scaling_gen7),
768 gen7_pp_plx_avs_initialize,
776 sizeof(pp_nv12_avs_gen7),
780 gen7_pp_plx_avs_initialize,
788 sizeof(pp_nv12_dndi_gen7),
792 gen7_pp_nv12_dndi_initialize,
800 sizeof(pp_nv12_dn_gen7),
804 gen7_pp_nv12_dn_initialize,
809 PP_NV12_LOAD_SAVE_PA,
810 pp_nv12_load_save_pa_gen7,
811 sizeof(pp_nv12_load_save_pa_gen7),
815 gen7_pp_plx_avs_initialize,
822 pp_pl3_load_save_pa_gen7,
823 sizeof(pp_pl3_load_save_pa_gen7),
827 gen7_pp_plx_avs_initialize,
833 PP_PA_LOAD_SAVE_NV12,
834 pp_pa_load_save_nv12_gen7,
835 sizeof(pp_pa_load_save_nv12_gen7),
839 gen7_pp_plx_avs_initialize,
846 pp_pa_load_save_pl3_gen7,
847 sizeof(pp_pa_load_save_pl3_gen7),
851 gen7_pp_plx_avs_initialize,
858 pp_pa_load_save_pa_gen7,
859 sizeof(pp_pa_load_save_pa_gen7),
863 gen7_pp_plx_avs_initialize,
869 PP_RGBX_LOAD_SAVE_NV12,
870 pp_rgbx_load_save_nv12_gen7,
871 sizeof(pp_rgbx_load_save_nv12_gen7),
875 gen7_pp_plx_avs_initialize,
881 PP_NV12_LOAD_SAVE_RGBX,
882 pp_nv12_load_save_rgbx_gen7,
883 sizeof(pp_nv12_load_save_rgbx_gen7),
887 gen7_pp_plx_avs_initialize,
892 static const uint32_t pp_null_gen75[][4] = {
895 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
896 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
899 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
900 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
903 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
904 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
907 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
908 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
911 static const uint32_t pp_nv12_scaling_gen75[][4] = {
912 #include "shaders/post_processing/gen7/avs.g75b"
915 static const uint32_t pp_nv12_avs_gen75[][4] = {
916 #include "shaders/post_processing/gen7/avs.g75b"
919 static const uint32_t pp_nv12_dndi_gen75[][4] = {
920 // #include "shaders/post_processing/gen7/dndi.g75b"
923 static const uint32_t pp_nv12_dn_gen75[][4] = {
924 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
926 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
927 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
929 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
932 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
933 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
935 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
936 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
938 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
939 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
941 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
942 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
944 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
945 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
948 static struct pp_module pp_modules_gen75[] = {
951 "NULL module (for testing)",
954 sizeof(pp_null_gen75),
964 PP_NV12_LOAD_SAVE_N12,
965 pp_nv12_load_save_nv12_gen75,
966 sizeof(pp_nv12_load_save_nv12_gen75),
970 gen7_pp_plx_avs_initialize,
976 PP_NV12_LOAD_SAVE_PL3,
977 pp_nv12_load_save_pl3_gen75,
978 sizeof(pp_nv12_load_save_pl3_gen75),
982 gen7_pp_plx_avs_initialize,
988 PP_PL3_LOAD_SAVE_N12,
989 pp_pl3_load_save_nv12_gen75,
990 sizeof(pp_pl3_load_save_nv12_gen75),
994 gen7_pp_plx_avs_initialize,
1000 PP_PL3_LOAD_SAVE_PL3,
1001 pp_pl3_load_save_pl3_gen75,
1002 sizeof(pp_pl3_load_save_pl3_gen75),
1006 gen7_pp_plx_avs_initialize,
1011 "NV12 Scaling module",
1013 pp_nv12_scaling_gen75,
1014 sizeof(pp_nv12_scaling_gen75),
1018 gen7_pp_plx_avs_initialize,
1026 sizeof(pp_nv12_avs_gen75),
1030 gen7_pp_plx_avs_initialize,
1038 sizeof(pp_nv12_dndi_gen75),
1042 gen7_pp_nv12_dn_initialize,
1050 sizeof(pp_nv12_dn_gen75),
1054 gen7_pp_nv12_dn_initialize,
1060 PP_NV12_LOAD_SAVE_PA,
1061 pp_nv12_load_save_pa_gen75,
1062 sizeof(pp_nv12_load_save_pa_gen75),
1066 gen7_pp_plx_avs_initialize,
1072 PP_PL3_LOAD_SAVE_PA,
1073 pp_pl3_load_save_pa_gen75,
1074 sizeof(pp_pl3_load_save_pa_gen75),
1078 gen7_pp_plx_avs_initialize,
1084 PP_PA_LOAD_SAVE_NV12,
1085 pp_pa_load_save_nv12_gen75,
1086 sizeof(pp_pa_load_save_nv12_gen75),
1090 gen7_pp_plx_avs_initialize,
1096 PP_PA_LOAD_SAVE_PL3,
1097 pp_pa_load_save_pl3_gen75,
1098 sizeof(pp_pa_load_save_pl3_gen75),
1102 gen7_pp_plx_avs_initialize,
1109 pp_pa_load_save_pa_gen75,
1110 sizeof(pp_pa_load_save_pa_gen75),
1114 gen7_pp_plx_avs_initialize,
1120 PP_RGBX_LOAD_SAVE_NV12,
1121 pp_rgbx_load_save_nv12_gen75,
1122 sizeof(pp_rgbx_load_save_nv12_gen75),
1126 gen7_pp_plx_avs_initialize,
1132 PP_NV12_LOAD_SAVE_RGBX,
1133 pp_nv12_load_save_rgbx_gen75,
1134 sizeof(pp_nv12_load_save_rgbx_gen75),
1138 gen7_pp_plx_avs_initialize,
1144 pp_dndi_frame_store_reset(DNDIFrameStore *fs)
1146 fs->obj_surface = NULL;
1147 fs->surface_id = VA_INVALID_ID;
1148 fs->is_scratch_surface = 0;
1152 pp_dndi_frame_store_swap(DNDIFrameStore *fs1, DNDIFrameStore *fs2)
1154 const DNDIFrameStore tmpfs = *fs1;
1160 pp_dndi_frame_store_clear(DNDIFrameStore *fs, VADriverContextP ctx)
1162 if (fs->obj_surface && fs->is_scratch_surface) {
1163 VASurfaceID va_surface = fs->obj_surface->base.id;
1164 i965_DestroySurfaces(ctx, &va_surface, 1);
1166 pp_dndi_frame_store_reset(fs);
1170 pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
1174 memset(dndi_ctx, 0, sizeof(*dndi_ctx));
1175 for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i++)
1176 pp_dndi_frame_store_reset(&dndi_ctx->frame_store[i]);
1180 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
1181 struct object_surface *obj_surface,
1182 const VAProcPipelineParameterBuffer *pipe_params,
1183 const VAProcFilterParameterBufferDeinterlacing *deint_params)
1187 dndi_ctx->is_di_enabled = 1;
1188 dndi_ctx->is_di_adv_enabled = 0;
1189 dndi_ctx->is_first_frame = 0;
1190 dndi_ctx->is_second_field = 0;
1192 /* Check whether we are deinterlacing the second field */
1193 if (dndi_ctx->is_di_enabled) {
1194 const unsigned int tff =
1195 !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
1196 const unsigned int is_top_field =
1197 !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
1199 if ((tff ^ is_top_field) != 0) {
1200 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1201 if (fs->surface_id != obj_surface->base.id) {
1202 WARN_ONCE("invalid surface provided for second field\n");
1203 return VA_STATUS_ERROR_INVALID_PARAMETER;
1205 dndi_ctx->is_second_field = 1;
1209 /* Check whether we are deinterlacing the first frame */
1210 if (dndi_ctx->is_di_enabled) {
1211 switch (deint_params->algorithm) {
1212 case VAProcDeinterlacingBob:
1213 dndi_ctx->is_first_frame = 1;
1215 case VAProcDeinterlacingMotionAdaptive:
1216 case VAProcDeinterlacingMotionCompensated:
1217 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1218 if (fs->surface_id == VA_INVALID_ID)
1219 dndi_ctx->is_first_frame = 1;
1220 else if (dndi_ctx->is_second_field) {
1221 /* At this stage, we have already deinterlaced the
1222 first field successfully. So, the first frame flag
1223 is trigerred if the previous field was deinterlaced
1224 without reference frame */
1225 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1226 if (fs->surface_id == VA_INVALID_ID)
1227 dndi_ctx->is_first_frame = 1;
1230 if (pipe_params->num_forward_references < 1 ||
1231 pipe_params->forward_references[0] == VA_INVALID_ID) {
1232 WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
1233 return VA_STATUS_ERROR_INVALID_PARAMETER;
1236 dndi_ctx->is_di_adv_enabled = 1;
1239 WARN_ONCE("unsupported deinterlacing algorithm (%d)\n",
1240 deint_params->algorithm);
1241 return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
1244 return VA_STATUS_SUCCESS;
1248 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
1249 struct i965_post_processing_context *pp_context,
1250 struct object_surface *src_surface, struct object_surface *dst_surface)
1252 struct i965_driver_data * const i965 = i965_driver_data(ctx);
1253 struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1254 unsigned int src_fourcc, dst_fourcc;
1255 unsigned int src_sampling, dst_sampling;
1256 unsigned int src_tiling, dst_tiling;
1257 unsigned int i, swizzle;
1260 /* Determine input surface info. Always use NV12 Y-tiled */
1261 if (src_surface->bo) {
1262 src_fourcc = src_surface->fourcc;
1263 src_sampling = src_surface->subsampling;
1264 dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
1265 src_tiling = !!src_tiling;
1268 src_fourcc = VA_FOURCC_NV12;
1269 src_sampling = SUBSAMPLE_YUV420;
1271 status = i965_check_alloc_surface_bo(ctx, src_surface,
1272 src_tiling, src_fourcc, src_sampling);
1273 if (status != VA_STATUS_SUCCESS)
1277 /* Determine output surface info. Always use NV12 Y-tiled */
1278 if (dst_surface->bo) {
1279 dst_fourcc = dst_surface->fourcc;
1280 dst_sampling = dst_surface->subsampling;
1281 dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
1282 dst_tiling = !!dst_tiling;
1285 dst_fourcc = VA_FOURCC_NV12;
1286 dst_sampling = SUBSAMPLE_YUV420;
1288 status = i965_check_alloc_surface_bo(ctx, dst_surface,
1289 dst_tiling, dst_fourcc, dst_sampling);
1290 if (status != VA_STATUS_SUCCESS)
1294 /* Create pipeline surfaces */
1295 for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i ++) {
1296 struct object_surface *obj_surface;
1297 VASurfaceID new_surface;
1298 unsigned int width, height;
1300 if (dndi_ctx->frame_store[i].obj_surface &&
1301 dndi_ctx->frame_store[i].obj_surface->bo)
1302 continue; // user allocated surface, not VPP internal
1304 if (dndi_ctx->frame_store[i].obj_surface) {
1305 obj_surface = dndi_ctx->frame_store[i].obj_surface;
1306 dndi_ctx->frame_store[i].is_scratch_surface = 0;
1308 if (i <= DNDI_FRAME_IN_STMM) {
1309 width = src_surface->orig_width;
1310 height = src_surface->orig_height;
1313 width = dst_surface->orig_width;
1314 height = dst_surface->orig_height;
1317 status = i965_CreateSurfaces(ctx, width, height, VA_RT_FORMAT_YUV420,
1319 if (status != VA_STATUS_SUCCESS)
1322 obj_surface = SURFACE(new_surface);
1323 assert(obj_surface != NULL);
1324 dndi_ctx->frame_store[i].is_scratch_surface = 1;
1327 if (i <= DNDI_FRAME_IN_PREVIOUS) {
1328 status = i965_check_alloc_surface_bo(ctx, obj_surface,
1329 src_tiling, src_fourcc, src_sampling);
1331 else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
1332 status = i965_check_alloc_surface_bo(ctx, obj_surface,
1333 1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
1335 else if (i >= DNDI_FRAME_OUT_CURRENT) {
1336 status = i965_check_alloc_surface_bo(ctx, obj_surface,
1337 dst_tiling, dst_fourcc, dst_sampling);
1339 if (status != VA_STATUS_SUCCESS)
1342 dndi_ctx->frame_store[i].obj_surface = obj_surface;
1344 return VA_STATUS_SUCCESS;
1348 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
1349 struct i965_post_processing_context *pp_context,
1350 struct object_surface *src_surface, struct object_surface *dst_surface)
1352 struct i965_driver_data * const i965 = i965_driver_data(ctx);
1353 struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1354 DNDIFrameStore *ifs, *ofs;
1355 bool is_new_frame = false;
1357 /* Update the previous input surface */
1358 is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
1359 src_surface->base.id;
1361 ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1362 ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1364 const VAProcPipelineParameterBuffer * const pipe_params =
1365 pp_context->pipeline_param;
1366 struct object_surface *obj_surface;
1368 if (pipe_params->num_forward_references < 1)
1370 if (pipe_params->forward_references[0] == VA_INVALID_ID)
1373 obj_surface = SURFACE(pipe_params->forward_references[0]);
1374 if (!obj_surface || obj_surface->base.id == ifs->surface_id)
1377 pp_dndi_frame_store_clear(ifs, ctx);
1378 if (obj_surface->base.id == ofs->surface_id) {
1380 pp_dndi_frame_store_reset(ofs);
1383 ifs->obj_surface = obj_surface;
1384 ifs->surface_id = obj_surface->base.id;
1389 /* Update the input surface */
1390 ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1391 pp_dndi_frame_store_clear(ifs, ctx);
1392 ifs->obj_surface = src_surface;
1393 ifs->surface_id = src_surface->base.id;
1395 /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
1397 pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
1398 &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
1400 /* Update the output surfaces */
1401 ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
1402 if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
1403 pp_dndi_frame_store_swap(ofs,
1404 &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
1405 if (!dndi_ctx->is_second_field)
1406 ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
1408 pp_dndi_frame_store_clear(ofs, ctx);
1409 ofs->obj_surface = dst_surface;
1410 ofs->surface_id = dst_surface->base.id;
1412 return VA_STATUS_SUCCESS;
1416 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1420 if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1421 struct object_image *obj_image = (struct object_image *)surface->base;
1422 fourcc = obj_image->image.format.fourcc;
1424 struct object_surface *obj_surface = (struct object_surface *)surface->base;
1425 fourcc = obj_surface->fourcc;
1432 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1434 if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1435 struct object_image *obj_image = (struct object_image *)surface->base;
1437 *width = obj_image->image.width;
1438 *height = obj_image->image.height;
1440 struct object_surface *obj_surface = (struct object_surface *)surface->base;
1442 *width = obj_surface->orig_width;
1443 *height = obj_surface->orig_height;
1448 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1451 case I915_TILING_NONE:
1452 ss->ss3.tiled_surface = 0;
1453 ss->ss3.tile_walk = 0;
1456 ss->ss3.tiled_surface = 1;
1457 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1460 ss->ss3.tiled_surface = 1;
1461 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1467 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1470 case I915_TILING_NONE:
1471 ss->ss2.tiled_surface = 0;
1472 ss->ss2.tile_walk = 0;
1475 ss->ss2.tiled_surface = 1;
1476 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1479 ss->ss2.tiled_surface = 1;
1480 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1486 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1489 case I915_TILING_NONE:
1490 ss->ss0.tiled_surface = 0;
1491 ss->ss0.tile_walk = 0;
1494 ss->ss0.tiled_surface = 1;
1495 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1498 ss->ss0.tiled_surface = 1;
1499 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1505 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1508 case I915_TILING_NONE:
1509 ss->ss2.tiled_surface = 0;
1510 ss->ss2.tile_walk = 0;
1513 ss->ss2.tiled_surface = 1;
1514 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1517 ss->ss2.tiled_surface = 1;
1518 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1524 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1526 struct i965_interface_descriptor *desc;
1528 int pp_index = pp_context->current_pp;
1530 bo = pp_context->idrt.bo;
1532 assert(bo->virtual);
1534 memset(desc, 0, sizeof(*desc));
1535 desc->desc0.grf_reg_blocks = 10;
1536 desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1537 desc->desc1.const_urb_entry_read_offset = 0;
1538 desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1539 desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1540 desc->desc2.sampler_count = 0;
1541 desc->desc3.binding_table_entry_count = 0;
1542 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1544 dri_bo_emit_reloc(bo,
1545 I915_GEM_DOMAIN_INSTRUCTION, 0,
1546 desc->desc0.grf_reg_blocks,
1547 offsetof(struct i965_interface_descriptor, desc0),
1548 pp_context->pp_modules[pp_index].kernel.bo);
1550 dri_bo_emit_reloc(bo,
1551 I915_GEM_DOMAIN_INSTRUCTION, 0,
1552 desc->desc2.sampler_count << 2,
1553 offsetof(struct i965_interface_descriptor, desc2),
1554 pp_context->sampler_state_table.bo);
1557 pp_context->idrt.num_interface_descriptors++;
1561 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1563 struct i965_vfe_state *vfe_state;
1566 bo = pp_context->vfe_state.bo;
1568 assert(bo->virtual);
1569 vfe_state = bo->virtual;
1570 memset(vfe_state, 0, sizeof(*vfe_state));
1571 vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1572 vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1573 vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1574 vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1575 vfe_state->vfe1.children_present = 0;
1576 vfe_state->vfe2.interface_descriptor_base =
1577 pp_context->idrt.bo->offset >> 4; /* reloc */
1578 dri_bo_emit_reloc(bo,
1579 I915_GEM_DOMAIN_INSTRUCTION, 0,
1581 offsetof(struct i965_vfe_state, vfe2),
1582 pp_context->idrt.bo);
1587 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1589 unsigned char *constant_buffer;
1590 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1592 assert(sizeof(*pp_static_parameter) == 128);
1593 dri_bo_map(pp_context->curbe.bo, 1);
1594 assert(pp_context->curbe.bo->virtual);
1595 constant_buffer = pp_context->curbe.bo->virtual;
1596 memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1597 dri_bo_unmap(pp_context->curbe.bo);
1601 ironlake_pp_states_setup(VADriverContextP ctx,
1602 struct i965_post_processing_context *pp_context)
1604 ironlake_pp_interface_descriptor_table(pp_context);
1605 ironlake_pp_vfe_state(pp_context);
1606 ironlake_pp_upload_constants(pp_context);
1610 ironlake_pp_pipeline_select(VADriverContextP ctx,
1611 struct i965_post_processing_context *pp_context)
1613 struct intel_batchbuffer *batch = pp_context->batch;
1615 BEGIN_BATCH(batch, 1);
1616 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1617 ADVANCE_BATCH(batch);
1621 ironlake_pp_urb_layout(VADriverContextP ctx,
1622 struct i965_post_processing_context *pp_context)
1624 struct intel_batchbuffer *batch = pp_context->batch;
1625 unsigned int vfe_fence, cs_fence;
1627 vfe_fence = pp_context->urb.cs_start;
1628 cs_fence = pp_context->urb.size;
1630 BEGIN_BATCH(batch, 3);
1631 OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1632 OUT_BATCH(batch, 0);
1634 (vfe_fence << UF2_VFE_FENCE_SHIFT) | /* VFE_SIZE */
1635 (cs_fence << UF2_CS_FENCE_SHIFT)); /* CS_SIZE */
1636 ADVANCE_BATCH(batch);
1640 ironlake_pp_state_base_address(VADriverContextP ctx,
1641 struct i965_post_processing_context *pp_context)
1643 struct intel_batchbuffer *batch = pp_context->batch;
1645 BEGIN_BATCH(batch, 8);
1646 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1647 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1648 OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1649 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1650 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1651 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1652 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1653 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1654 ADVANCE_BATCH(batch);
1658 ironlake_pp_state_pointers(VADriverContextP ctx,
1659 struct i965_post_processing_context *pp_context)
1661 struct intel_batchbuffer *batch = pp_context->batch;
1663 BEGIN_BATCH(batch, 3);
1664 OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1665 OUT_BATCH(batch, 0);
1666 OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1667 ADVANCE_BATCH(batch);
1671 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1672 struct i965_post_processing_context *pp_context)
1674 struct intel_batchbuffer *batch = pp_context->batch;
1676 BEGIN_BATCH(batch, 2);
1677 OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1679 ((pp_context->urb.size_cs_entry - 1) << 4) | /* URB Entry Allocation Size */
1680 (pp_context->urb.num_cs_entries << 0)); /* Number of URB Entries */
1681 ADVANCE_BATCH(batch);
1685 ironlake_pp_constant_buffer(VADriverContextP ctx,
1686 struct i965_post_processing_context *pp_context)
1688 struct intel_batchbuffer *batch = pp_context->batch;
1690 BEGIN_BATCH(batch, 2);
1691 OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1692 OUT_RELOC(batch, pp_context->curbe.bo,
1693 I915_GEM_DOMAIN_INSTRUCTION, 0,
1694 pp_context->urb.size_cs_entry - 1);
1695 ADVANCE_BATCH(batch);
1699 ironlake_pp_object_walker(VADriverContextP ctx,
1700 struct i965_post_processing_context *pp_context)
1702 struct intel_batchbuffer *batch = pp_context->batch;
1703 int x, x_steps, y, y_steps;
1704 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1706 x_steps = pp_context->pp_x_steps(pp_context->private_context);
1707 y_steps = pp_context->pp_y_steps(pp_context->private_context);
1709 for (y = 0; y < y_steps; y++) {
1710 for (x = 0; x < x_steps; x++) {
1711 if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1712 BEGIN_BATCH(batch, 20);
1713 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1714 OUT_BATCH(batch, 0);
1715 OUT_BATCH(batch, 0); /* no indirect data */
1716 OUT_BATCH(batch, 0);
1718 /* inline data grf 5-6 */
1719 assert(sizeof(*pp_inline_parameter) == 64);
1720 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1722 ADVANCE_BATCH(batch);
1729 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1730 struct i965_post_processing_context *pp_context)
1732 struct intel_batchbuffer *batch = pp_context->batch;
1734 intel_batchbuffer_start_atomic(batch, 0x1000);
1735 intel_batchbuffer_emit_mi_flush(batch);
1736 ironlake_pp_pipeline_select(ctx, pp_context);
1737 ironlake_pp_state_base_address(ctx, pp_context);
1738 ironlake_pp_state_pointers(ctx, pp_context);
1739 ironlake_pp_urb_layout(ctx, pp_context);
1740 ironlake_pp_cs_urb_layout(ctx, pp_context);
1741 ironlake_pp_constant_buffer(ctx, pp_context);
1742 ironlake_pp_object_walker(ctx, pp_context);
1743 intel_batchbuffer_end_atomic(batch);
1746 // update u/v offset when the surface format are packed yuv
1747 static void i965_update_src_surface_static_parameter(
1748 VADriverContextP ctx,
1749 struct i965_post_processing_context *pp_context,
1750 const struct i965_surface *surface)
1752 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1753 int fourcc = pp_get_surface_fourcc(ctx, surface);
1756 case VA_FOURCC_YUY2:
1757 pp_static_parameter->grf1.source_packed_u_offset = 1;
1758 pp_static_parameter->grf1.source_packed_v_offset = 3;
1760 case VA_FOURCC_UYVY:
1761 pp_static_parameter->grf1.source_packed_y_offset = 1;
1762 pp_static_parameter->grf1.source_packed_v_offset = 2;
1764 case VA_FOURCC_BGRX:
1765 case VA_FOURCC_BGRA:
1766 pp_static_parameter->grf1.source_rgb_layout = 0;
1768 case VA_FOURCC_RGBX:
1769 case VA_FOURCC_RGBA:
1770 pp_static_parameter->grf1.source_rgb_layout = 1;
1778 static void i965_update_dst_surface_static_parameter(
1779 VADriverContextP ctx,
1780 struct i965_post_processing_context *pp_context,
1781 const struct i965_surface *surface)
1783 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1784 int fourcc = pp_get_surface_fourcc(ctx, surface);
1787 case VA_FOURCC_YUY2:
1788 pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1789 pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1791 case VA_FOURCC_UYVY:
1792 pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1793 pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1795 case VA_FOURCC_BGRX:
1796 case VA_FOURCC_BGRA:
1797 pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1799 case VA_FOURCC_RGBX:
1800 case VA_FOURCC_RGBA:
1801 pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1810 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1811 dri_bo *surf_bo, unsigned long surf_bo_offset,
1812 int width, int height, int pitch, int format,
1813 int index, int is_target)
1815 struct i965_surface_state *ss;
1817 unsigned int tiling;
1818 unsigned int swizzle;
1820 dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1821 ss_bo = pp_context->surface_state_binding_table.bo;
1824 dri_bo_map(ss_bo, True);
1825 assert(ss_bo->virtual);
1826 ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1827 memset(ss, 0, sizeof(*ss));
1828 ss->ss0.surface_type = I965_SURFACE_2D;
1829 ss->ss0.surface_format = format;
1830 ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1831 ss->ss2.width = width - 1;
1832 ss->ss2.height = height - 1;
1833 ss->ss3.pitch = pitch - 1;
1834 pp_set_surface_tiling(ss, tiling);
1835 dri_bo_emit_reloc(ss_bo,
1836 I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1838 SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1840 ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1841 dri_bo_unmap(ss_bo);
1845 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1846 dri_bo *surf_bo, unsigned long surf_bo_offset,
1847 int width, int height, int wpitch,
1848 int xoffset, int yoffset,
1849 int format, int interleave_chroma,
1852 struct i965_surface_state2 *ss2;
1854 unsigned int tiling;
1855 unsigned int swizzle;
1857 dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1858 ss2_bo = pp_context->surface_state_binding_table.bo;
1861 dri_bo_map(ss2_bo, True);
1862 assert(ss2_bo->virtual);
1863 ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1864 memset(ss2, 0, sizeof(*ss2));
1865 ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1866 ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1867 ss2->ss1.width = width - 1;
1868 ss2->ss1.height = height - 1;
1869 ss2->ss2.pitch = wpitch - 1;
1870 ss2->ss2.interleave_chroma = interleave_chroma;
1871 ss2->ss2.surface_format = format;
1872 ss2->ss3.x_offset_for_cb = xoffset;
1873 ss2->ss3.y_offset_for_cb = yoffset;
1874 pp_set_surface2_tiling(ss2, tiling);
1875 dri_bo_emit_reloc(ss2_bo,
1876 I915_GEM_DOMAIN_RENDER, 0,
1878 SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1880 ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1881 dri_bo_unmap(ss2_bo);
1885 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1886 dri_bo *surf_bo, unsigned long surf_bo_offset,
1887 int width, int height, int pitch, int format,
1888 int index, int is_target)
1890 struct i965_driver_data * const i965 = i965_driver_data(ctx);
1891 struct gen7_surface_state *ss;
1893 unsigned int tiling;
1894 unsigned int swizzle;
1896 dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1897 ss_bo = pp_context->surface_state_binding_table.bo;
1900 dri_bo_map(ss_bo, True);
1901 assert(ss_bo->virtual);
1902 ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1903 memset(ss, 0, sizeof(*ss));
1904 ss->ss0.surface_type = I965_SURFACE_2D;
1905 ss->ss0.surface_format = format;
1906 ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1907 ss->ss2.width = width - 1;
1908 ss->ss2.height = height - 1;
1909 ss->ss3.pitch = pitch - 1;
1910 gen7_pp_set_surface_tiling(ss, tiling);
1911 if (IS_HASWELL(i965->intel.device_info))
1912 gen7_render_set_surface_scs(ss);
1913 dri_bo_emit_reloc(ss_bo,
1914 I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1916 SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1918 ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1919 dri_bo_unmap(ss_bo);
1923 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1924 dri_bo *surf_bo, unsigned long surf_bo_offset,
1925 int width, int height, int wpitch,
1926 int xoffset, int yoffset,
1927 int format, int interleave_chroma,
1930 struct gen7_surface_state2 *ss2;
1932 unsigned int tiling;
1933 unsigned int swizzle;
1935 dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1936 ss2_bo = pp_context->surface_state_binding_table.bo;
1939 dri_bo_map(ss2_bo, True);
1940 assert(ss2_bo->virtual);
1941 ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1942 memset(ss2, 0, sizeof(*ss2));
1943 ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1944 ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1945 ss2->ss1.width = width - 1;
1946 ss2->ss1.height = height - 1;
1947 ss2->ss2.pitch = wpitch - 1;
1948 ss2->ss2.interleave_chroma = interleave_chroma;
1949 ss2->ss2.surface_format = format;
1950 ss2->ss3.x_offset_for_cb = xoffset;
1951 ss2->ss3.y_offset_for_cb = yoffset;
1952 gen7_pp_set_surface2_tiling(ss2, tiling);
1953 dri_bo_emit_reloc(ss2_bo,
1954 I915_GEM_DOMAIN_RENDER, 0,
1956 SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1958 ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1959 dri_bo_unmap(ss2_bo);
1963 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1964 const struct i965_surface *surface,
1965 int base_index, int is_target,
1966 int *width, int *height, int *pitch, int *offset)
1968 struct object_surface *obj_surface;
1969 struct object_image *obj_image;
1971 int fourcc = pp_get_surface_fourcc(ctx, surface);
1973 const int U = ((fourcc == VA_FOURCC_YV12) ||
1974 (fourcc == VA_FOURCC_YV16))
1976 const int V = ((fourcc == VA_FOURCC_YV12) ||
1977 (fourcc == VA_FOURCC_YV16))
1980 int interleaved_uv = fourcc == VA_FOURCC_NV12;
1981 int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1982 int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1983 fourcc == VA_FOURCC_RGBX ||
1984 fourcc == VA_FOURCC_BGRA ||
1985 fourcc == VA_FOURCC_BGRX);
1986 int scale_factor_of_1st_plane_width_in_byte = 1;
1988 if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1989 obj_surface = (struct object_surface *)surface->base;
1990 bo = obj_surface->bo;
1991 width[0] = obj_surface->orig_width;
1992 height[0] = obj_surface->orig_height;
1993 pitch[0] = obj_surface->width;
1996 if (full_packed_format) {
1997 scale_factor_of_1st_plane_width_in_byte = 4;
1999 else if (packed_yuv ) {
2000 scale_factor_of_1st_plane_width_in_byte = 2;
2002 else if (interleaved_uv) {
2003 width[1] = obj_surface->orig_width;
2004 height[1] = obj_surface->orig_height / 2;
2005 pitch[1] = obj_surface->width;
2006 offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2008 width[1] = obj_surface->orig_width / 2;
2009 height[1] = obj_surface->orig_height / 2;
2010 pitch[1] = obj_surface->width / 2;
2011 offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2012 width[2] = obj_surface->orig_width / 2;
2013 height[2] = obj_surface->orig_height / 2;
2014 pitch[2] = obj_surface->width / 2;
2015 offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2018 obj_image = (struct object_image *)surface->base;
2020 width[0] = obj_image->image.width;
2021 height[0] = obj_image->image.height;
2022 pitch[0] = obj_image->image.pitches[0];
2023 offset[0] = obj_image->image.offsets[0];
2025 if (full_packed_format) {
2026 scale_factor_of_1st_plane_width_in_byte = 4;
2028 else if (packed_yuv ) {
2029 scale_factor_of_1st_plane_width_in_byte = 2;
2031 else if (interleaved_uv) {
2032 width[1] = obj_image->image.width;
2033 height[1] = obj_image->image.height / 2;
2034 pitch[1] = obj_image->image.pitches[1];
2035 offset[1] = obj_image->image.offsets[1];
2037 width[1] = obj_image->image.width / 2;
2038 height[1] = obj_image->image.height / 2;
2039 pitch[1] = obj_image->image.pitches[1];
2040 offset[1] = obj_image->image.offsets[1];
2041 width[2] = obj_image->image.width / 2;
2042 height[2] = obj_image->image.height / 2;
2043 pitch[2] = obj_image->image.pitches[2];
2044 offset[2] = obj_image->image.offsets[2];
2045 if (fourcc == VA_FOURCC_YV16) {
2046 width[1] = obj_image->image.width / 2;
2047 height[1] = obj_image->image.height;
2048 width[2] = obj_image->image.width / 2;
2049 height[2] = obj_image->image.height;
2055 i965_pp_set_surface_state(ctx, pp_context,
2057 ALIGN(width[Y] *scale_factor_of_1st_plane_width_in_byte, 4) / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2058 base_index, is_target);
2060 if (!packed_yuv && !full_packed_format) {
2061 if (interleaved_uv) {
2062 i965_pp_set_surface_state(ctx, pp_context,
2064 ALIGN(width[UV], 4) / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2065 base_index + 1, is_target);
2068 i965_pp_set_surface_state(ctx, pp_context,
2070 ALIGN(width[U], 4) / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2071 base_index + 1, is_target);
2074 i965_pp_set_surface_state(ctx, pp_context,
2076 ALIGN(width[V], 4) / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2077 base_index + 2, is_target);
2084 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2085 const struct i965_surface *surface,
2086 int base_index, int is_target,
2087 const VARectangle *rect,
2088 int *width, int *height, int *pitch, int *offset)
2090 struct object_surface *obj_surface;
2091 struct object_image *obj_image;
2093 int fourcc = pp_get_surface_fourcc(ctx, surface);
2094 const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
2096 if (fourcc_info == NULL)
2099 if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2100 obj_surface = (struct object_surface *)surface->base;
2101 bo = obj_surface->bo;
2102 width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
2103 height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
2104 pitch[0] = obj_surface->width;
2107 if (fourcc_info->num_planes == 1 && is_target)
2108 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2110 width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2111 height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2112 pitch[1] = obj_surface->cb_cr_pitch;
2113 offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2115 width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2116 height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2117 pitch[2] = obj_surface->cb_cr_pitch;
2118 offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2122 /* FIXME: add support for ARGB/ABGR image */
2123 obj_image = (struct object_image *)surface->base;
2125 width[0] = MIN(rect->x + rect->width, obj_image->image.width);
2126 height[0] = MIN(rect->y + rect->height, obj_image->image.height);
2127 pitch[0] = obj_image->image.pitches[0];
2128 offset[0] = obj_image->image.offsets[0];
2130 if (fourcc_info->num_planes == 1) {
2132 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2133 } else if (fourcc_info->num_planes == 2) {
2136 assert(fourcc_info->num_components == 3);
2138 U = fourcc_info->components[1].plane;
2139 V = fourcc_info->components[2].plane;
2140 assert((U == 1 && V == 2) ||
2141 (U == 2 && V == 1));
2144 /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
2145 width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2146 height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2147 pitch[1] = obj_image->image.pitches[U];
2148 offset[1] = obj_image->image.offsets[U];
2150 width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2151 height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2152 pitch[2] = obj_image->image.pitches[V];
2153 offset[2] = obj_image->image.offsets[V];
2157 gen7_pp_set_surface_state(ctx, pp_context,
2159 ALIGN(width[0], 4) / 4, height[0], pitch[0],
2160 I965_SURFACEFORMAT_R8_UINT,
2163 if (fourcc_info->num_planes == 2) {
2164 gen7_pp_set_surface_state(ctx, pp_context,
2166 ALIGN(width[1], 2) / 2, height[1], pitch[1],
2167 I965_SURFACEFORMAT_R8G8_SINT,
2169 } else if (fourcc_info->num_planes == 3) {
2170 gen7_pp_set_surface_state(ctx, pp_context,
2172 ALIGN(width[1], 4) / 4, height[1], pitch[1],
2173 I965_SURFACEFORMAT_R8_SINT,
2175 gen7_pp_set_surface_state(ctx, pp_context,
2177 ALIGN(width[2], 4) / 4, height[2], pitch[2],
2178 I965_SURFACEFORMAT_R8_SINT,
2182 if (fourcc_info->format == I965_COLOR_RGB) {
2183 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2184 /* the format is MSB: X-B-G-R */
2185 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2186 if ((fourcc == VA_FOURCC_BGRA) ||
2187 (fourcc == VA_FOURCC_BGRX)) {
2188 /* It is stored as MSB: X-R-G-B */
2189 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2193 int format0 = SURFACE_FORMAT_Y8_UNORM;
2196 case VA_FOURCC_YUY2:
2197 format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2200 case VA_FOURCC_UYVY:
2201 format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2208 if (fourcc_info->format == I965_COLOR_RGB) {
2209 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2210 /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2211 format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2212 pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2213 if ((fourcc == VA_FOURCC_BGRA) ||
2214 (fourcc == VA_FOURCC_BGRX)) {
2215 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2219 gen7_pp_set_surface2_state(ctx, pp_context,
2221 width[0], height[0], pitch[0],
2226 if (fourcc_info->num_planes == 2) {
2227 gen7_pp_set_surface2_state(ctx, pp_context,
2229 width[1], height[1], pitch[1],
2231 SURFACE_FORMAT_R8B8_UNORM, 0,
2233 } else if (fourcc_info->num_planes == 3) {
2234 gen7_pp_set_surface2_state(ctx, pp_context,
2236 width[1], height[1], pitch[1],
2238 SURFACE_FORMAT_R8_UNORM, 0,
2240 gen7_pp_set_surface2_state(ctx, pp_context,
2242 width[2], height[2], pitch[2],
2244 SURFACE_FORMAT_R8_UNORM, 0,
2251 pp_null_x_steps(void *private_context)
2257 pp_null_y_steps(void *private_context)
2263 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2269 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2270 const struct i965_surface *src_surface,
2271 const VARectangle *src_rect,
2272 struct i965_surface *dst_surface,
2273 const VARectangle *dst_rect,
2276 /* private function & data */
2277 pp_context->pp_x_steps = pp_null_x_steps;
2278 pp_context->pp_y_steps = pp_null_y_steps;
2279 pp_context->private_context = NULL;
2280 pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2282 dst_surface->flags = src_surface->flags;
2284 return VA_STATUS_SUCCESS;
2288 pp_load_save_x_steps(void *private_context)
2294 pp_load_save_y_steps(void *private_context)
2296 struct pp_load_save_context *pp_load_save_context = private_context;
2298 return pp_load_save_context->dest_h / 8;
2302 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2304 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2305 struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2307 pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2308 pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2313 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2316 /* x offset of dest surface must be dword aligned.
2317 * so we have to extend dst surface on left edge, and mask out pixels not interested
2319 if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2320 pp_context->block_horizontal_mask_left = 0;
2321 for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2323 pp_context->block_horizontal_mask_left |= 1<<i;
2327 pp_context->block_horizontal_mask_left = 0xffff;
2330 int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2331 if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2332 pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2335 pp_context->block_horizontal_mask_right = 0xffff;
2338 if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2339 pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2342 pp_context->block_vertical_mask_bottom = 0xff;
2347 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2348 const struct i965_surface *src_surface,
2349 const VARectangle *src_rect,
2350 struct i965_surface *dst_surface,
2351 const VARectangle *dst_rect,
2354 struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2355 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2356 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2357 int width[3], height[3], pitch[3], offset[3];
2359 /* source surface */
2360 pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2361 width, height, pitch, offset);
2363 /* destination surface */
2364 pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2365 width, height, pitch, offset);
2367 /* private function & data */
2368 pp_context->pp_x_steps = pp_load_save_x_steps;
2369 pp_context->pp_y_steps = pp_load_save_y_steps;
2370 pp_context->private_context = &pp_context->pp_load_save_context;
2371 pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2373 int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2374 pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2375 pp_load_save_context->dest_y = dst_rect->y;
2376 pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2377 pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2379 pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16; /* 1 x N */
2380 pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2382 pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2383 pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2385 // update u/v offset for packed yuv
2386 i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2387 i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2389 dst_surface->flags = src_surface->flags;
2391 return VA_STATUS_SUCCESS;
2395 pp_scaling_x_steps(void *private_context)
2401 pp_scaling_y_steps(void *private_context)
2403 struct pp_scaling_context *pp_scaling_context = private_context;
2405 return pp_scaling_context->dest_h / 8;
2409 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2411 struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2412 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2413 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2414 float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2415 float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2417 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2418 pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2419 pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2420 pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2426 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2427 const struct i965_surface *src_surface,
2428 const VARectangle *src_rect,
2429 struct i965_surface *dst_surface,
2430 const VARectangle *dst_rect,
2433 struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2434 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2435 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2436 struct object_surface *obj_surface;
2437 struct i965_sampler_state *sampler_state;
2438 int in_w, in_h, in_wpitch, in_hpitch;
2439 int out_w, out_h, out_wpitch, out_hpitch;
2441 /* source surface */
2442 obj_surface = (struct object_surface *)src_surface->base;
2443 in_w = obj_surface->orig_width;
2444 in_h = obj_surface->orig_height;
2445 in_wpitch = obj_surface->width;
2446 in_hpitch = obj_surface->height;
2448 /* source Y surface index 1 */
2449 i965_pp_set_surface_state(ctx, pp_context,
2451 in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2454 /* source UV surface index 2 */
2455 i965_pp_set_surface_state(ctx, pp_context,
2456 obj_surface->bo, in_wpitch * in_hpitch,
2457 ALIGN(in_w, 2) / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2460 /* destination surface */
2461 obj_surface = (struct object_surface *)dst_surface->base;
2462 out_w = obj_surface->orig_width;
2463 out_h = obj_surface->orig_height;
2464 out_wpitch = obj_surface->width;
2465 out_hpitch = obj_surface->height;
2467 /* destination Y surface index 7 */
2468 i965_pp_set_surface_state(ctx, pp_context,
2470 ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2473 /* destination UV surface index 8 */
2474 i965_pp_set_surface_state(ctx, pp_context,
2475 obj_surface->bo, out_wpitch * out_hpitch,
2476 ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2480 dri_bo_map(pp_context->sampler_state_table.bo, True);
2481 assert(pp_context->sampler_state_table.bo->virtual);
2482 sampler_state = pp_context->sampler_state_table.bo->virtual;
2484 /* SIMD16 Y index 1 */
2485 sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2486 sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2487 sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2488 sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2489 sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2491 /* SIMD16 UV index 2 */
2492 sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2493 sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2494 sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2495 sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2496 sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2498 dri_bo_unmap(pp_context->sampler_state_table.bo);
2500 /* private function & data */
2501 pp_context->pp_x_steps = pp_scaling_x_steps;
2502 pp_context->pp_y_steps = pp_scaling_y_steps;
2503 pp_context->private_context = &pp_context->pp_scaling_context;
2504 pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2506 int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2507 float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2508 pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2509 pp_scaling_context->dest_y = dst_rect->y;
2510 pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2511 pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2512 pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2513 pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2515 pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2517 pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2518 pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16; /* 1 x N */
2519 pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2521 dst_surface->flags = src_surface->flags;
2523 return VA_STATUS_SUCCESS;
2527 pp_avs_x_steps(void *private_context)
2529 struct pp_avs_context *pp_avs_context = private_context;
2531 return pp_avs_context->dest_w / 16;
2535 pp_avs_y_steps(void *private_context)
2541 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2543 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2544 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2545 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2546 float src_x_steping, src_y_steping, video_step_delta;
2547 int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2549 if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2550 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2551 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2552 } else if (tmp_w >= pp_avs_context->dest_w) {
2553 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2554 pp_inline_parameter->grf6.video_step_delta = 0;
2557 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2558 pp_avs_context->src_normalized_x;
2560 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2561 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2562 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2563 16 * 15 * video_step_delta / 2;
2566 int n0, n1, n2, nls_left, nls_right;
2567 int factor_a = 5, factor_b = 4;
2570 n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2571 n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2572 n2 = tmp_w / (16 * factor_a);
2574 nls_right = n1 + n2;
2575 f = (float) n2 * 16 / tmp_w;
2578 pp_inline_parameter->grf6.video_step_delta = 0.0;
2581 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2582 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2584 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2585 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2586 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2587 16 * 15 * video_step_delta / 2;
2591 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2592 float a = f / (nls_left * 16 * factor_b);
2593 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2595 pp_inline_parameter->grf6.video_step_delta = b;
2598 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2599 pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2601 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2602 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2603 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2604 16 * 15 * video_step_delta / 2;
2605 pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2607 } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2608 /* scale the center linearly */
2609 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2610 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2611 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2612 16 * 15 * video_step_delta / 2;
2613 pp_inline_parameter->grf6.video_step_delta = 0.0;
2614 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2616 float a = f / (nls_right * 16 * factor_b);
2617 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2619 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2620 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2621 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2622 16 * 15 * video_step_delta / 2;
2623 pp_inline_parameter->grf6.video_step_delta = -b;
2625 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2626 pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16 - 1) * b;
2628 pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2633 src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2634 pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2635 pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2636 pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2641 static const AVSConfig gen5_avs_config = {
2642 .coeff_frac_bits = 6,
2643 .coeff_epsilon = 1.0f / (1U << 6),
2645 .num_luma_coeffs = 8,
2646 .num_chroma_coeffs = 4,
2650 .y_k_h = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2651 .y_k_v = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2652 .uv_k_h = { -1, 0, 0, -1 },
2653 .uv_k_v = { -1, 0, 0, -1 },
2656 .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2657 .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2658 .uv_k_h = { 1, 2, 2, 1 },
2659 .uv_k_v = { 1, 2, 2, 1 },
2664 static const AVSConfig gen6_avs_config = {
2665 .coeff_frac_bits = 6,
2666 .coeff_epsilon = 1.0f / (1U << 6),
2668 .num_luma_coeffs = 8,
2669 .num_chroma_coeffs = 4,
2673 .y_k_h = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2674 .y_k_v = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2675 .uv_k_h = { -1, 0, 0, -1 },
2676 .uv_k_v = { -1, 0, 0, -1 },
2679 .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2680 .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2681 .uv_k_h = { 1, 2, 2, 1 },
2682 .uv_k_v = { 1, 2, 2, 1 },
2688 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2689 const struct i965_surface *src_surface,
2690 const VARectangle *src_rect,
2691 struct i965_surface *dst_surface,
2692 const VARectangle *dst_rect,
2695 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2696 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2697 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2698 struct object_surface *obj_surface;
2699 struct i965_sampler_8x8 *sampler_8x8;
2700 struct i965_sampler_8x8_state *sampler_8x8_state;
2702 int in_w, in_h, in_wpitch, in_hpitch;
2703 int out_w, out_h, out_wpitch, out_hpitch;
2705 AVSState * const avs = &pp_avs_context->state;
2708 const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
2709 VA_FILTER_SCALING_NL_ANAMORPHIC;
2712 obj_surface = (struct object_surface *)src_surface->base;
2713 in_w = obj_surface->orig_width;
2714 in_h = obj_surface->orig_height;
2715 in_wpitch = obj_surface->width;
2716 in_hpitch = obj_surface->height;
2718 /* source Y surface index 1 */
2719 i965_pp_set_surface2_state(ctx, pp_context,
2721 in_w, in_h, in_wpitch,
2723 SURFACE_FORMAT_Y8_UNORM, 0,
2726 /* source UV surface index 2 */
2727 i965_pp_set_surface2_state(ctx, pp_context,
2728 obj_surface->bo, in_wpitch * in_hpitch,
2729 in_w / 2, in_h / 2, in_wpitch,
2731 SURFACE_FORMAT_R8B8_UNORM, 0,
2734 /* destination surface */
2735 obj_surface = (struct object_surface *)dst_surface->base;
2736 out_w = obj_surface->orig_width;
2737 out_h = obj_surface->orig_height;
2738 out_wpitch = obj_surface->width;
2739 out_hpitch = obj_surface->height;
2740 assert(out_w <= out_wpitch && out_h <= out_hpitch);
2742 /* destination Y surface index 7 */
2743 i965_pp_set_surface_state(ctx, pp_context,
2745 ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2748 /* destination UV surface index 8 */
2749 i965_pp_set_surface_state(ctx, pp_context,
2750 obj_surface->bo, out_wpitch * out_hpitch,
2751 ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2754 /* sampler 8x8 state */
2755 dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2756 assert(pp_context->sampler_state_table.bo_8x8->virtual);
2757 assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2758 sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2759 memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2761 sx = (float)dst_rect->width / src_rect->width;
2762 sy = (float)dst_rect->height / src_rect->height;
2763 avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
2765 assert(avs->config->num_phases == 16);
2766 for (i = 0; i <= 16; i++) {
2767 const AVSCoeffs * const coeffs = &avs->coeffs[i];
2769 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
2770 intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
2771 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
2772 intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
2773 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
2774 intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
2775 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
2776 intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
2777 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
2778 intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
2779 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
2780 intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
2781 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
2782 intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
2783 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
2784 intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
2786 sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
2787 intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
2788 sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
2789 intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
2790 sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
2791 intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
2792 sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
2793 intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
2795 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
2796 intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
2797 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
2798 intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
2799 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
2800 intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
2801 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
2802 intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
2803 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
2804 intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
2805 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
2806 intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
2807 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
2808 intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
2809 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
2810 intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
2812 sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
2813 intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
2814 sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
2815 intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
2816 sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
2817 intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
2818 sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
2819 intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
2822 /* Adaptive filter for all channels (DW4.15) */
2823 sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
2825 sampler_8x8_state->dw136.default_sharpness_level =
2826 -avs_is_needed(pp_context->filter_flags);
2827 sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
2828 sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
2829 dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2832 dri_bo_map(pp_context->sampler_state_table.bo, True);
2833 assert(pp_context->sampler_state_table.bo->virtual);
2834 assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2835 sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2837 /* sample_8x8 Y index 1 */
2839 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2840 sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2841 sampler_8x8[index].dw0.ief_bypass = 1;
2842 sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2843 sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2844 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2845 sampler_8x8[index].dw2.global_noise_estimation = 22;
2846 sampler_8x8[index].dw2.strong_edge_threshold = 8;
2847 sampler_8x8[index].dw2.weak_edge_threshold = 1;
2848 sampler_8x8[index].dw3.strong_edge_weight = 7;
2849 sampler_8x8[index].dw3.regular_weight = 2;
2850 sampler_8x8[index].dw3.non_edge_weight = 0;
2851 sampler_8x8[index].dw3.gain_factor = 40;
2852 sampler_8x8[index].dw4.steepness_boost = 0;
2853 sampler_8x8[index].dw4.steepness_threshold = 0;
2854 sampler_8x8[index].dw4.mr_boost = 0;
2855 sampler_8x8[index].dw4.mr_threshold = 5;
2856 sampler_8x8[index].dw5.pwl1_point_1 = 4;
2857 sampler_8x8[index].dw5.pwl1_point_2 = 12;
2858 sampler_8x8[index].dw5.pwl1_point_3 = 16;
2859 sampler_8x8[index].dw5.pwl1_point_4 = 26;
2860 sampler_8x8[index].dw6.pwl1_point_5 = 40;
2861 sampler_8x8[index].dw6.pwl1_point_6 = 160;
2862 sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2863 sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2864 sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2865 sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2866 sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2867 sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2868 sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2869 sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2870 sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2871 sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2872 sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2873 sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2874 sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2875 sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2876 sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2877 sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2878 sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2879 sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2880 sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2881 sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2882 sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2883 sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2884 sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2885 sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2886 sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2887 sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2888 sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2889 sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2890 sampler_8x8[index].dw13.limiter_boost = 0;
2891 sampler_8x8[index].dw13.minimum_limiter = 10;
2892 sampler_8x8[index].dw13.maximum_limiter = 11;
2893 sampler_8x8[index].dw14.clip_limiter = 130;
2894 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2895 I915_GEM_DOMAIN_RENDER,
2898 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2899 pp_context->sampler_state_table.bo_8x8);
2901 /* sample_8x8 UV index 2 */
2903 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2904 sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2905 sampler_8x8[index].dw0.ief_bypass = 1;
2906 sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2907 sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2908 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2909 sampler_8x8[index].dw2.global_noise_estimation = 22;
2910 sampler_8x8[index].dw2.strong_edge_threshold = 8;
2911 sampler_8x8[index].dw2.weak_edge_threshold = 1;
2912 sampler_8x8[index].dw3.strong_edge_weight = 7;
2913 sampler_8x8[index].dw3.regular_weight = 2;
2914 sampler_8x8[index].dw3.non_edge_weight = 0;
2915 sampler_8x8[index].dw3.gain_factor = 40;
2916 sampler_8x8[index].dw4.steepness_boost = 0;
2917 sampler_8x8[index].dw4.steepness_threshold = 0;
2918 sampler_8x8[index].dw4.mr_boost = 0;
2919 sampler_8x8[index].dw4.mr_threshold = 5;
2920 sampler_8x8[index].dw5.pwl1_point_1 = 4;
2921 sampler_8x8[index].dw5.pwl1_point_2 = 12;
2922 sampler_8x8[index].dw5.pwl1_point_3 = 16;
2923 sampler_8x8[index].dw5.pwl1_point_4 = 26;
2924 sampler_8x8[index].dw6.pwl1_point_5 = 40;
2925 sampler_8x8[index].dw6.pwl1_point_6 = 160;
2926 sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2927 sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2928 sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2929 sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2930 sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2931 sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2932 sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2933 sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2934 sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2935 sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2936 sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2937 sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2938 sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2939 sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2940 sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2941 sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2942 sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2943 sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2944 sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2945 sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2946 sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2947 sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2948 sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2949 sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2950 sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2951 sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2952 sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2953 sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2954 sampler_8x8[index].dw13.limiter_boost = 0;
2955 sampler_8x8[index].dw13.minimum_limiter = 10;
2956 sampler_8x8[index].dw13.maximum_limiter = 11;
2957 sampler_8x8[index].dw14.clip_limiter = 130;
2958 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2959 I915_GEM_DOMAIN_RENDER,
2962 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2963 pp_context->sampler_state_table.bo_8x8);
2965 dri_bo_unmap(pp_context->sampler_state_table.bo);
2967 /* private function & data */
2968 pp_context->pp_x_steps = pp_avs_x_steps;
2969 pp_context->pp_y_steps = pp_avs_y_steps;
2970 pp_context->private_context = &pp_context->pp_avs_context;
2971 pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2973 int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2974 float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2975 pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2976 pp_avs_context->dest_y = dst_rect->y;
2977 pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2978 pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2979 pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2980 pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2981 pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2982 pp_avs_context->src_h = src_rect->height;
2984 pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2985 pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2987 pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2988 pp_inline_parameter->grf5.block_count_x = 1; /* M x 1 */
2989 pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2990 pp_inline_parameter->grf6.video_step_delta = 0.0;
2992 dst_surface->flags = src_surface->flags;
2994 return VA_STATUS_SUCCESS;
2998 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2999 const struct i965_surface *src_surface,
3000 const VARectangle *src_rect,
3001 struct i965_surface *dst_surface,
3002 const VARectangle *dst_rect,
3005 return pp_nv12_avs_initialize(ctx, pp_context,
3014 gen7_pp_avs_x_steps(void *private_context)
3016 struct pp_avs_context *pp_avs_context = private_context;
3018 return pp_avs_context->dest_w / 16;
3022 gen7_pp_avs_y_steps(void *private_context)
3024 struct pp_avs_context *pp_avs_context = private_context;
3026 return pp_avs_context->dest_h / 16;
3030 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3032 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3033 struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3035 pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3036 pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3037 pp_inline_parameter->grf9.constant_0 = 0xffffffff;
3038 pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3043 static void gen7_update_src_surface_uv_offset(VADriverContextP ctx,
3044 struct i965_post_processing_context *pp_context,
3045 const struct i965_surface *surface)
3047 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3048 int fourcc = pp_get_surface_fourcc(ctx, surface);
3050 if (fourcc == VA_FOURCC_YUY2) {
3051 pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3052 pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3053 pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3054 } else if (fourcc == VA_FOURCC_UYVY) {
3055 pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3056 pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3057 pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3062 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3063 const struct i965_surface *src_surface,
3064 const VARectangle *src_rect,
3065 struct i965_surface *dst_surface,
3066 const VARectangle *dst_rect,
3069 struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3070 struct i965_driver_data *i965 = i965_driver_data(ctx);
3071 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3072 struct gen7_sampler_8x8 *sampler_8x8;
3073 struct i965_sampler_8x8_state *sampler_8x8_state;
3075 int width[3], height[3], pitch[3], offset[3];
3076 int src_width, src_height;
3077 AVSState * const avs = &pp_avs_context->state;
3079 const float * yuv_to_rgb_coefs;
3080 size_t yuv_to_rgb_coefs_size;
3082 /* source surface */
3083 gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3085 width, height, pitch, offset);
3086 src_width = width[0];
3087 src_height = height[0];
3089 /* destination surface */
3090 gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3092 width, height, pitch, offset);
3094 /* sampler 8x8 state */
3095 dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3096 assert(pp_context->sampler_state_table.bo_8x8->virtual);
3097 assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3098 sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3099 memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3101 sx = (float)dst_rect->width / src_rect->width;
3102 sy = (float)dst_rect->height / src_rect->height;
3103 avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
3105 assert(avs->config->num_phases == 16);
3106 for (i = 0; i <= 16; i++) {
3107 const AVSCoeffs * const coeffs = &avs->coeffs[i];
3109 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
3110 intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
3111 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
3112 intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
3113 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
3114 intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
3115 sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
3116 intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
3117 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
3118 intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
3119 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
3120 intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
3121 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
3122 intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
3123 sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
3124 intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
3126 sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
3127 intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
3128 sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
3129 intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
3130 sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
3131 intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
3132 sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
3133 intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
3135 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
3136 intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
3137 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
3138 intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
3139 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
3140 intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
3141 sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
3142 intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
3143 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
3144 intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
3145 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
3146 intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
3147 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
3148 intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
3149 sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
3150 intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
3152 sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
3153 intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
3154 sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
3155 intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
3156 sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
3157 intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
3158 sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
3159 intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
3162 sampler_8x8_state->dw136.default_sharpness_level =
3163 -avs_is_needed(pp_context->filter_flags);
3164 if (IS_HASWELL(i965->intel.device_info)) {
3165 sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
3166 sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
3167 sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
3170 sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
3171 sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
3172 sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
3174 dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3177 dri_bo_map(pp_context->sampler_state_table.bo, True);
3178 assert(pp_context->sampler_state_table.bo->virtual);
3179 assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3180 sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3182 /* sample_8x8 Y index 4 */
3184 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3185 sampler_8x8[index].dw0.global_noise_estimation = 255;
3186 sampler_8x8[index].dw0.ief_bypass = 1;
3188 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3190 sampler_8x8[index].dw2.weak_edge_threshold = 1;
3191 sampler_8x8[index].dw2.strong_edge_threshold = 8;
3192 sampler_8x8[index].dw2.r5x_coefficient = 9;
3193 sampler_8x8[index].dw2.r5cx_coefficient = 8;
3194 sampler_8x8[index].dw2.r5c_coefficient = 3;
3196 sampler_8x8[index].dw3.r3x_coefficient = 27;
3197 sampler_8x8[index].dw3.r3c_coefficient = 5;
3198 sampler_8x8[index].dw3.gain_factor = 40;
3199 sampler_8x8[index].dw3.non_edge_weight = 1;
3200 sampler_8x8[index].dw3.regular_weight = 2;
3201 sampler_8x8[index].dw3.strong_edge_weight = 7;
3202 sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3204 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3205 I915_GEM_DOMAIN_RENDER,
3208 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3209 pp_context->sampler_state_table.bo_8x8);
3211 /* sample_8x8 UV index 8 */
3213 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3214 sampler_8x8[index].dw0.disable_8x8_filter = 0;
3215 sampler_8x8[index].dw0.global_noise_estimation = 255;
3216 sampler_8x8[index].dw0.ief_bypass = 1;
3217 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3218 sampler_8x8[index].dw2.weak_edge_threshold = 1;
3219 sampler_8x8[index].dw2.strong_edge_threshold = 8;
3220 sampler_8x8[index].dw2.r5x_coefficient = 9;
3221 sampler_8x8[index].dw2.r5cx_coefficient = 8;
3222 sampler_8x8[index].dw2.r5c_coefficient = 3;
3223 sampler_8x8[index].dw3.r3x_coefficient = 27;
3224 sampler_8x8[index].dw3.r3c_coefficient = 5;
3225 sampler_8x8[index].dw3.gain_factor = 40;
3226 sampler_8x8[index].dw3.non_edge_weight = 1;
3227 sampler_8x8[index].dw3.regular_weight = 2;
3228 sampler_8x8[index].dw3.strong_edge_weight = 7;
3229 sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3231 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3232 I915_GEM_DOMAIN_RENDER,
3235 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3236 pp_context->sampler_state_table.bo_8x8);
3238 /* sampler_8x8 V, index 12 */
3240 memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3241 sampler_8x8[index].dw0.disable_8x8_filter = 0;
3242 sampler_8x8[index].dw0.global_noise_estimation = 255;
3243 sampler_8x8[index].dw0.ief_bypass = 1;
3244 sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3245 sampler_8x8[index].dw2.weak_edge_threshold = 1;
3246 sampler_8x8[index].dw2.strong_edge_threshold = 8;
3247 sampler_8x8[index].dw2.r5x_coefficient = 9;
3248 sampler_8x8[index].dw2.r5cx_coefficient = 8;
3249 sampler_8x8[index].dw2.r5c_coefficient = 3;
3250 sampler_8x8[index].dw3.r3x_coefficient = 27;
3251 sampler_8x8[index].dw3.r3c_coefficient = 5;
3252 sampler_8x8[index].dw3.gain_factor = 40;
3253 sampler_8x8[index].dw3.non_edge_weight = 1;
3254 sampler_8x8[index].dw3.regular_weight = 2;
3255 sampler_8x8[index].dw3.strong_edge_weight = 7;
3256 sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3258 dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3259 I915_GEM_DOMAIN_RENDER,
3262 sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3263 pp_context->sampler_state_table.bo_8x8);
3265 dri_bo_unmap(pp_context->sampler_state_table.bo);
3267 /* private function & data */
3268 pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3269 pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3270 pp_context->private_context = &pp_context->pp_avs_context;
3271 pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3273 int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
3274 pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3275 pp_avs_context->dest_y = dst_rect->y;
3276 pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3277 pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3278 pp_avs_context->src_w = src_rect->width;
3279 pp_avs_context->src_h = src_rect->height;
3280 pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3282 int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3283 dw = MAX(dw, dst_rect->width + dst_left_edge_extend);
3285 pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3286 pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3287 if (IS_HASWELL(i965->intel.device_info))
3288 pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3290 if (pp_static_parameter->grf2.avs_wa_enable) {
3291 int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3292 if ((src_fourcc == VA_FOURCC_RGBA) ||
3293 (src_fourcc == VA_FOURCC_RGBX) ||
3294 (src_fourcc == VA_FOURCC_BGRA) ||
3295 (src_fourcc == VA_FOURCC_BGRX)) {
3296 pp_static_parameter->grf2.avs_wa_enable = 0;
3300 pp_static_parameter->grf2.avs_wa_width = src_width;
3301 pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3302 pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3303 pp_static_parameter->grf2.alpha = 255;
3305 pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3306 pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3307 pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3308 (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3309 pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3310 (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3312 gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3314 yuv_to_rgb_coefs = i915_color_standard_to_coefs (i915_filter_to_color_standard (src_surface->flags &
3316 &yuv_to_rgb_coefs_size);
3317 memcpy(&pp_static_parameter->grf7, yuv_to_rgb_coefs, yuv_to_rgb_coefs_size);
3319 dst_surface->flags = src_surface->flags;
3321 return VA_STATUS_SUCCESS;
3325 pp_dndi_x_steps(void *private_context)
3331 pp_dndi_y_steps(void *private_context)
3333 struct pp_dndi_context *pp_dndi_context = private_context;
3335 return pp_dndi_context->dest_h / 4;
3339 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3341 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3343 pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3344 pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3350 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3351 const struct i965_surface *src_surface,
3352 const VARectangle *src_rect,
3353 struct i965_surface *dst_surface,
3354 const VARectangle *dst_rect,
3357 struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3358 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3359 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3360 const VAProcPipelineParameterBuffer * const pipe_params =
3361 pp_context->pipeline_param;
3362 const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3364 struct object_surface * const src_obj_surface = (struct object_surface *)
3366 struct object_surface * const dst_obj_surface = (struct object_surface *)
3368 struct object_surface *obj_surface;
3369 struct i965_sampler_dndi *sampler_dndi;
3370 int index, dndi_top_first;
3371 int w, h, orig_w, orig_h;
3374 status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3375 pipe_params, deint_params);
3376 if (status != VA_STATUS_SUCCESS)
3379 status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3380 src_obj_surface, dst_obj_surface);
3381 if (status != VA_STATUS_SUCCESS)
3384 status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3385 src_obj_surface, dst_obj_surface);
3386 if (status != VA_STATUS_SUCCESS)
3389 /* Current input surface (index = 4) */
3390 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3391 i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3392 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3393 0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3395 /* Previous input surface (index = 5) */
3396 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3397 i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3398 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3399 0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
3401 /* STMM input surface (index = 6) */
3402 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3403 i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3404 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3405 I965_SURFACEFORMAT_R8_UNORM, 6, 1);
3407 /* Previous output surfaces (index = { 7, 8 }) */
3408 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3409 w = obj_surface->width;
3410 h = obj_surface->height;
3411 orig_w = obj_surface->orig_width;
3412 orig_h = obj_surface->orig_height;
3414 i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3415 ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
3416 i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3417 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
3419 /* Current output surfaces (index = { 10, 11 }) */
3420 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3421 w = obj_surface->width;
3422 h = obj_surface->height;
3423 orig_w = obj_surface->orig_width;
3424 orig_h = obj_surface->orig_height;
3426 i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3427 ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
3428 i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3429 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
3431 /* STMM output surface (index = 20) */
3432 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3433 i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3434 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3435 I965_SURFACEFORMAT_R8_UNORM, 20, 1);
3437 dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3440 dri_bo_map(pp_context->sampler_state_table.bo, True);
3441 assert(pp_context->sampler_state_table.bo->virtual);
3442 assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3443 sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3445 /* sample dndi index 1 */
3447 sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3448 sampler_dndi[index].dw0.denoise_history_delta = 7; // 0-15, default is 8
3449 sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240
3450 sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3452 sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3453 sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3454 sampler_dndi[index].dw1.stmm_c2 = 1;
3455 sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3456 sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3458 sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31
3459 sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1; // 0-15
3460 sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
3461 sampler_dndi[index].dw2.good_neighbor_threshold = 12; // 0-63
3463 sampler_dndi[index].dw3.maximum_stmm = 150;
3464 sampler_dndi[index].dw3.multipler_for_vecm = 30;
3465 sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3466 sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3467 sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3469 sampler_dndi[index].dw4.sdi_delta = 5;
3470 sampler_dndi[index].dw4.sdi_threshold = 100;
3471 sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift
3472 sampler_dndi[index].dw4.stmm_shift_up = 1;
3473 sampler_dndi[index].dw4.stmm_shift_down = 3;
3474 sampler_dndi[index].dw4.minimum_stmm = 118;
3476 sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3477 sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3478 sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3479 sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3481 sampler_dndi[index].dw6.dn_enable = 1;
3482 sampler_dndi[index].dw6.di_enable = 1;
3483 sampler_dndi[index].dw6.di_partial = 0;
3484 sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3485 sampler_dndi[index].dw6.dndi_stream_id = 0;
3486 sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3487 sampler_dndi[index].dw6.progressive_dn = 0;
3488 sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3489 sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3490 sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3492 sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3493 sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3494 sampler_dndi[index].dw7.vdi_walker_enable = 0;
3495 sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3497 dri_bo_unmap(pp_context->sampler_state_table.bo);
3499 /* private function & data */
3500 pp_context->pp_x_steps = pp_dndi_x_steps;
3501 pp_context->pp_y_steps = pp_dndi_y_steps;
3502 pp_context->private_context = dndi_ctx;
3503 pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3505 pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3506 pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3507 pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3508 pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3510 pp_inline_parameter->grf5.block_count_x = w / 16; /* 1 x N */
3511 pp_inline_parameter->grf5.number_blocks = w / 16;
3512 pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3513 pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3515 dndi_ctx->dest_w = w;
3516 dndi_ctx->dest_h = h;
3518 dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3519 return VA_STATUS_SUCCESS;
3523 pp_dn_x_steps(void *private_context)
3529 pp_dn_y_steps(void *private_context)
3531 struct pp_dn_context *pp_dn_context = private_context;
3533 return pp_dn_context->dest_h / 8;
3537 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3539 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3541 pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3542 pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3548 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3549 const struct i965_surface *src_surface,
3550 const VARectangle *src_rect,
3551 struct i965_surface *dst_surface,
3552 const VARectangle *dst_rect,
3555 struct i965_driver_data *i965 = i965_driver_data(ctx);
3556 struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3557 struct object_surface *obj_surface;
3558 struct i965_sampler_dndi *sampler_dndi;
3559 struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3560 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3561 VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3565 int dn_strength = 15;
3566 int dndi_top_first = 1;
3567 int dn_progressive = 0;
3569 if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3572 } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3580 if (dn_filter_param) {
3581 float value = dn_filter_param->value;
3589 dn_strength = (int)(value * 31.0F);
3593 obj_surface = (struct object_surface *)src_surface->base;
3594 orig_w = obj_surface->orig_width;
3595 orig_h = obj_surface->orig_height;
3596 w = obj_surface->width;
3597 h = obj_surface->height;
3599 if (pp_dn_context->stmm_bo == NULL) {
3600 pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3604 assert(pp_dn_context->stmm_bo);
3607 /* source UV surface index 2 */
3608 i965_pp_set_surface_state(ctx, pp_context,
3609 obj_surface->bo, w * h,
3610 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3613 /* source YUV surface index 4 */
3614 i965_pp_set_surface2_state(ctx, pp_context,
3618 SURFACE_FORMAT_PLANAR_420_8, 1,
3621 /* source STMM surface index 20 */
3622 i965_pp_set_surface_state(ctx, pp_context,
3623 pp_dn_context->stmm_bo, 0,
3624 orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3627 /* destination surface */
3628 obj_surface = (struct object_surface *)dst_surface->base;
3629 orig_w = obj_surface->orig_width;
3630 orig_h = obj_surface->orig_height;
3631 w = obj_surface->width;
3632 h = obj_surface->height;
3634 /* destination Y surface index 7 */
3635 i965_pp_set_surface_state(ctx, pp_context,
3637 ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3640 /* destination UV surface index 8 */
3641 i965_pp_set_surface_state(ctx, pp_context,
3642 obj_surface->bo, w * h,
3643 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3646 dri_bo_map(pp_context->sampler_state_table.bo, True);
3647 assert(pp_context->sampler_state_table.bo->virtual);
3648 assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3649 sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3651 /* sample dndi index 1 */
3653 sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3654 sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8
3655 sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
3656 sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3658 sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3659 sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3660 sampler_dndi[index].dw1.stmm_c2 = 0;
3661 sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3662 sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3664 sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength; // 0-31
3665 sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15
3666 sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
3667 sampler_dndi[index].dw2.good_neighbor_threshold = 7; // 0-63
3669 sampler_dndi[index].dw3.maximum_stmm = 128;
3670 sampler_dndi[index].dw3.multipler_for_vecm = 2;
3671 sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3672 sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3673 sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3675 sampler_dndi[index].dw4.sdi_delta = 8;
3676 sampler_dndi[index].dw4.sdi_threshold = 128;
3677 sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
3678 sampler_dndi[index].dw4.stmm_shift_up = 0;
3679 sampler_dndi[index].dw4.stmm_shift_down = 0;
3680 sampler_dndi[index].dw4.minimum_stmm = 0;
3682 sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3683 sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3684 sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3685 sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3687 sampler_dndi[index].dw6.dn_enable = 1;
3688 sampler_dndi[index].dw6.di_enable = 0;
3689 sampler_dndi[index].dw6.di_partial = 0;
3690 sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3691 sampler_dndi[index].dw6.dndi_stream_id = 1;
3692 sampler_dndi[index].dw6.dndi_first_frame = 1;
3693 sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3694 sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3695 sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3696 sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3698 sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3699 sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3700 sampler_dndi[index].dw7.vdi_walker_enable = 0;
3701 sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3703 dri_bo_unmap(pp_context->sampler_state_table.bo);
3705 /* private function & data */
3706 pp_context->pp_x_steps = pp_dn_x_steps;
3707 pp_context->pp_y_steps = pp_dn_y_steps;
3708 pp_context->private_context = &pp_context->pp_dn_context;
3709 pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3711 pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3712 pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3713 pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3714 pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3716 pp_inline_parameter->grf5.block_count_x = w / 16; /* 1 x N */
3717 pp_inline_parameter->grf5.number_blocks = w / 16;
3718 pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3719 pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3721 pp_dn_context->dest_w = w;
3722 pp_dn_context->dest_h = h;
3724 dst_surface->flags = src_surface->flags;
3726 return VA_STATUS_SUCCESS;
3730 gen7_pp_dndi_x_steps(void *private_context)
3732 struct pp_dndi_context *pp_dndi_context = private_context;
3734 return pp_dndi_context->dest_w / 16;
3738 gen7_pp_dndi_y_steps(void *private_context)
3740 struct pp_dndi_context *pp_dndi_context = private_context;
3742 return pp_dndi_context->dest_h / 4;
3746 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3748 struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3750 pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16;
3751 pp_inline_parameter->grf9.destination_block_vertical_origin = y * 4;
3757 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3758 const struct i965_surface *src_surface,
3759 const VARectangle *src_rect,
3760 struct i965_surface *dst_surface,
3761 const VARectangle *dst_rect,
3764 struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3765 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3766 const VAProcPipelineParameterBuffer * const pipe_params =
3767 pp_context->pipeline_param;
3768 const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3770 struct object_surface * const src_obj_surface = (struct object_surface *)
3772 struct object_surface * const dst_obj_surface = (struct object_surface *)
3774 struct object_surface *obj_surface;
3775 struct gen7_sampler_dndi *sampler_dndi;
3776 int index, dndi_top_first;
3777 int w, h, orig_w, orig_h;
3780 status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3781 pipe_params, deint_params);
3782 if (status != VA_STATUS_SUCCESS)
3785 status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3786 src_obj_surface, dst_obj_surface);
3787 if (status != VA_STATUS_SUCCESS)
3790 status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3791 src_obj_surface, dst_obj_surface);
3792 if (status != VA_STATUS_SUCCESS)
3795 /* Current input surface (index = 3) */
3796 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3797 gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3798 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3799 0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
3801 /* Previous input surface (index = 4) */
3802 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3803 gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3804 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3805 0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3807 /* STMM input surface (index = 5) */
3808 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3809 gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3810 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3811 I965_SURFACEFORMAT_R8_UNORM, 5, 1);
3813 /* Previous output surfaces (index = { 27, 28 }) */
3814 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3815 w = obj_surface->width;
3816 h = obj_surface->height;
3817 orig_w = obj_surface->orig_width;
3818 orig_h = obj_surface->orig_height;
3820 gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3821 ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
3822 gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3823 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
3825 /* Current output surfaces (index = { 30, 31 }) */
3826 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3827 w = obj_surface->width;
3828 h = obj_surface->height;
3829 orig_w = obj_surface->orig_width;
3830 orig_h = obj_surface->orig_height;
3832 gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3833 ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
3834 gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3835 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
3837 /* STMM output surface (index = 33) */
3838 obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3839 gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3840 obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3841 I965_SURFACEFORMAT_R8_UNORM, 33, 1);
3843 dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3846 dri_bo_map(pp_context->sampler_state_table.bo, True);
3847 assert(pp_context->sampler_state_table.bo->virtual);
3848 assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3849 sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3851 /* sample dndi index 0 */
3853 sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3854 sampler_dndi[index].dw0.dnmh_delt = 7;
3855 sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3856 sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3857 sampler_dndi[index].dw0.denoise_maximum_history = 192; // 128-240
3858 sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3860 sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3861 sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3862 sampler_dndi[index].dw1.stmm_c2 = 2;
3863 sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3864 sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3866 sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20; // 0-31
3867 sampler_dndi[index].dw2.bne_edge_th = 1;
3868 sampler_dndi[index].dw2.smooth_mv_th = 0;
3869 sampler_dndi[index].dw2.sad_tight_th = 5;
3870 sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3871 sampler_dndi[index].dw2.good_neighbor_th = 12;
3873 sampler_dndi[index].dw3.maximum_stmm = 150;
3874 sampler_dndi[index].dw3.multipler_for_vecm = 30;
3875 sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3876 sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3877 sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3879 sampler_dndi[index].dw4.sdi_delta = 5;
3880 sampler_dndi[index].dw4.sdi_threshold = 100;
3881 sampler_dndi[index].dw4.stmm_output_shift = 5; // stmm_max - stmm_min = 2 ^ stmm_output_shift
3882 sampler_dndi[index].dw4.stmm_shift_up = 1;
3883 sampler_dndi[index].dw4.stmm_shift_down = 3;
3884 sampler_dndi[index].dw4.minimum_stmm = 118;
3886 sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3887 sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3888 sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3889 sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3890 sampler_dndi[index].dw6.dn_enable = 0;
3891 sampler_dndi[index].dw6.di_enable = 1;
3892 sampler_dndi[index].dw6.di_partial = 0;
3893 sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3894 sampler_dndi[index].dw6.dndi_stream_id = 1;
3895 sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3896 sampler_dndi[index].dw6.progressive_dn = 0;
3897 sampler_dndi[index].dw6.mcdi_enable =
3898 (deint_params->algorithm == VAProcDeinterlacingMotionCompensated);
3899 sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3900 sampler_dndi[index].dw6.cat_th1 = 0;
3901 sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3902 sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3904 sampler_dndi[index].dw7.sad_tha = 5;
3905 sampler_dndi[index].dw7.sad_thb = 10;
3906 sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3907 sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3908 sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3909 sampler_dndi[index].dw7.vdi_walker_enable = 0;
3910 sampler_dndi[index].dw7.neighborpixel_th = 10;
3911 sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3913 dri_bo_unmap(pp_context->sampler_state_table.bo);
3915 /* private function & data */
3916 pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3917 pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3918 pp_context->private_context = dndi_ctx;
3919 pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3921 pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3922 pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3923 pp_static_parameter->grf1.di_top_field_first = 0;
3924 pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3926 pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3927 pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3928 pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3930 pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3931 pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3933 dndi_ctx->dest_w = w;
3934 dndi_ctx->dest_h = h;
3936 dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3937 return VA_STATUS_SUCCESS;
3941 gen7_pp_dn_x_steps(void *private_context)
3943 struct pp_dn_context *pp_dn_context = private_context;
3945 return pp_dn_context->dest_w / 16;
3949 gen7_pp_dn_y_steps(void *private_context)
3951 struct pp_dn_context *pp_dn_context = private_context;
3953 return pp_dn_context->dest_h / 4;
3957 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3959 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3961 pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3962 pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3968 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3969 const struct i965_surface *src_surface,
3970 const VARectangle *src_rect,
3971 struct i965_surface *dst_surface,
3972 const VARectangle *dst_rect,
3975 struct i965_driver_data *i965 = i965_driver_data(ctx);
3976 struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3977 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3978 struct object_surface *obj_surface;
3979 struct gen7_sampler_dndi *sampler_dn;
3980 VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3984 int dn_strength = 15;
3985 int dndi_top_first = 1;
3986 int dn_progressive = 0;
3988 if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3991 } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3999 if (dn_filter_param) {
4000 float value = dn_filter_param->value;
4008 dn_strength = (int)(value * 31.0F);
4012 obj_surface = (struct object_surface *)src_surface->base;
4013 orig_w = obj_surface->orig_width;
4014 orig_h = obj_surface->orig_height;
4015 w = obj_surface->width;
4016 h = obj_surface->height;
4018 if (pp_dn_context->stmm_bo == NULL) {
4019 pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
4023 assert(pp_dn_context->stmm_bo);
4026 /* source UV surface index 1 */
4027 gen7_pp_set_surface_state(ctx, pp_context,
4028 obj_surface->bo, w * h,
4029 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4032 /* source YUV surface index 3 */
4033 gen7_pp_set_surface2_state(ctx, pp_context,
4037 SURFACE_FORMAT_PLANAR_420_8, 1,
4040 /* source (temporal reference) YUV surface index 4 */
4041 gen7_pp_set_surface2_state(ctx, pp_context,
4045 SURFACE_FORMAT_PLANAR_420_8, 1,
4048 /* STMM / History Statistics input surface, index 5 */
4049 gen7_pp_set_surface_state(ctx, pp_context,
4050 pp_dn_context->stmm_bo, 0,
4051 orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4054 /* destination surface */
4055 obj_surface = (struct object_surface *)dst_surface->base;
4056 orig_w = obj_surface->orig_width;
4057 orig_h = obj_surface->orig_height;
4058 w = obj_surface->width;
4059 h = obj_surface->height;
4061 /* destination Y surface index 24 */
4062 gen7_pp_set_surface_state(ctx, pp_context,
4064 ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4067 /* destination UV surface index 25 */
4068 gen7_pp_set_surface_state(ctx, pp_context,
4069 obj_surface->bo, w * h,
4070 ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4074 dri_bo_map(pp_context->sampler_state_table.bo, True);
4075 assert(pp_context->sampler_state_table.bo->virtual);
4076 assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4077 sampler_dn = pp_context->sampler_state_table.bo->virtual;
4079 /* sample dn index 1 */
4081 sampler_dn[index].dw0.denoise_asd_threshold = 0;
4082 sampler_dn[index].dw0.dnmh_delt = 8;
4083 sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4084 sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4085 sampler_dn[index].dw0.denoise_maximum_history = 128; // 128-240
4086 sampler_dn[index].dw0.denoise_stad_threshold = 0;
4088 sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4089 sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4090 sampler_dn[index].dw1.stmm_c2 = 0;
4091 sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4092 sampler_dn[index].dw1.temporal_difference_threshold = 16;
4094 sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength; // 0-31
4095 sampler_dn[index].dw2.bne_edge_th = 1;
4096 sampler_dn[index].dw2.smooth_mv_th = 0;
4097 sampler_dn[index].dw2.sad_tight_th = 5;
4098 sampler_dn[index].dw2.cat_slope_minus1 = 9;
4099 sampler_dn[index].dw2.good_neighbor_th = 4;
4101 sampler_dn[index].dw3.maximum_stmm = 128;
4102 sampler_dn[index].dw3.multipler_for_vecm = 2;
4103 sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4104 sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4105 sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4107 sampler_dn[index].dw4.sdi_delta = 8;
4108 sampler_dn[index].dw4.sdi_threshold = 128;
4109 sampler_dn[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
4110 sampler_dn[index].dw4.stmm_shift_up = 0;
4111 sampler_dn[index].dw4.stmm_shift_down = 0;
4112 sampler_dn[index].dw4.minimum_stmm = 0;
4114 sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4115 sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4116 sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4117 sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4119 sampler_dn[index].dw6.dn_enable = 1;
4120 sampler_dn[index].dw6.di_enable = 0;
4121 sampler_dn[index].dw6.di_partial = 0;
4122 sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4123 sampler_dn[index].dw6.dndi_stream_id = 1;
4124 sampler_dn[index].dw6.dndi_first_frame = 1;
4125 sampler_dn[index].dw6.progressive_dn = dn_progressive;
4126 sampler_dn[index].dw6.mcdi_enable = 0;
4127 sampler_dn[index].dw6.fmd_tear_threshold = 32;
4128 sampler_dn[index].dw6.cat_th1 = 0;
4129 sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4130 sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4132 sampler_dn[index].dw7.sad_tha = 5;
4133 sampler_dn[index].dw7.sad_thb = 10;
4134 sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4135 sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4136 sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4137 sampler_dn[index].dw7.vdi_walker_enable = 0;
4138 sampler_dn[index].dw7.neighborpixel_th = 10;
4139 sampler_dn[index].dw7.column_width_minus1 = w / 16;
4141 dri_bo_unmap(pp_context->sampler_state_table.bo);
4143 /* private function & data */
4144 pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4145 pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4146 pp_context->private_context = &pp_context->pp_dn_context;
4147 pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4149 pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4150 pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4151 pp_static_parameter->grf1.di_top_field_first = 0;
4152 pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4154 pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4155 pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4156 pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4158 pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4159 pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4161 pp_dn_context->dest_w = w;
4162 pp_dn_context->dest_h = h;
4164 dst_surface->flags = src_surface->flags;
4166 return VA_STATUS_SUCCESS;
4170 ironlake_pp_initialize(
4171 VADriverContextP ctx,
4172 struct i965_post_processing_context *pp_context,
4173 const struct i965_surface *src_surface,
4174 const VARectangle *src_rect,
4175 struct i965_surface *dst_surface,
4176 const VARectangle *dst_rect,
4182 struct i965_driver_data *i965 = i965_driver_data(ctx);
4183 struct pp_module *pp_module;
4185 int static_param_size, inline_param_size;
4187 dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4188 bo = dri_bo_alloc(i965->intel.bufmgr,
4189 "surface state & binding table",
4190 (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4193 pp_context->surface_state_binding_table.bo = bo;
4195 dri_bo_unreference(pp_context->curbe.bo);
4196 bo = dri_bo_alloc(i965->intel.bufmgr,
4201 pp_context->curbe.bo = bo;
4203 dri_bo_unreference(pp_context->idrt.bo);
4204 bo = dri_bo_alloc(i965->intel.bufmgr,
4205 "interface discriptor",
4206 sizeof(struct i965_interface_descriptor),
4209 pp_context->idrt.bo = bo;
4210 pp_context->idrt.num_interface_descriptors = 0;
4212 dri_bo_unreference(pp_context->sampler_state_table.bo);
4213 bo = dri_bo_alloc(i965->intel.bufmgr,
4214 "sampler state table",
4218 dri_bo_map(bo, True);
4219 memset(bo->virtual, 0, bo->size);
4221 pp_context->sampler_state_table.bo = bo;
4223 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4224 bo = dri_bo_alloc(i965->intel.bufmgr,
4225 "sampler 8x8 state ",
4229 pp_context->sampler_state_table.bo_8x8 = bo;
4231 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4232 bo = dri_bo_alloc(i965->intel.bufmgr,
4233 "sampler 8x8 state ",
4237 pp_context->sampler_state_table.bo_8x8_uv = bo;
4239 dri_bo_unreference(pp_context->vfe_state.bo);
4240 bo = dri_bo_alloc(i965->intel.bufmgr,
4242 sizeof(struct i965_vfe_state),
4245 pp_context->vfe_state.bo = bo;
4247 static_param_size = sizeof(struct pp_static_parameter);
4248 inline_param_size = sizeof(struct pp_inline_parameter);
4250 memset(pp_context->pp_static_parameter, 0, static_param_size);
4251 memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4253 assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4254 pp_context->current_pp = pp_index;
4255 pp_module = &pp_context->pp_modules[pp_index];
4257 if (pp_module->initialize)
4258 va_status = pp_module->initialize(ctx, pp_context,
4265 va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4271 ironlake_post_processing(
4272 VADriverContextP ctx,
4273 struct i965_post_processing_context *pp_context,
4274 const struct i965_surface *src_surface,
4275 const VARectangle *src_rect,
4276 struct i965_surface *dst_surface,
4277 const VARectangle *dst_rect,
4284 va_status = ironlake_pp_initialize(ctx, pp_context,
4292 if (va_status == VA_STATUS_SUCCESS) {
4293 ironlake_pp_states_setup(ctx, pp_context);
4294 ironlake_pp_pipeline_setup(ctx, pp_context);
4302 VADriverContextP ctx,
4303 struct i965_post_processing_context *pp_context,
4304 const struct i965_surface *src_surface,
4305 const VARectangle *src_rect,
4306 struct i965_surface *dst_surface,
4307 const VARectangle *dst_rect,
4313 struct i965_driver_data *i965 = i965_driver_data(ctx);
4314 struct pp_module *pp_module;
4316 int static_param_size, inline_param_size;
4318 dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4319 bo = dri_bo_alloc(i965->intel.bufmgr,
4320 "surface state & binding table",
4321 (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4324 pp_context->surface_state_binding_table.bo = bo;
4326 dri_bo_unreference(pp_context->curbe.bo);
4327 bo = dri_bo_alloc(i965->intel.bufmgr,
4332 pp_context->curbe.bo = bo;
4334 dri_bo_unreference(pp_context->idrt.bo);
4335 bo = dri_bo_alloc(i965->intel.bufmgr,
4336 "interface discriptor",
4337 sizeof(struct gen6_interface_descriptor_data),
4340 pp_context->idrt.bo = bo;
4341 pp_context->idrt.num_interface_descriptors = 0;
4343 dri_bo_unreference(pp_context->sampler_state_table.bo);
4344 bo = dri_bo_alloc(i965->intel.bufmgr,
4345 "sampler state table",
4349 dri_bo_map(bo, True);
4350 memset(bo->virtual, 0, bo->size);
4352 pp_context->sampler_state_table.bo = bo;
4354 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4355 bo = dri_bo_alloc(i965->intel.bufmgr,
4356 "sampler 8x8 state ",
4360 pp_context->sampler_state_table.bo_8x8 = bo;
4362 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4363 bo = dri_bo_alloc(i965->intel.bufmgr,
4364 "sampler 8x8 state ",
4368 pp_context->sampler_state_table.bo_8x8_uv = bo;
4370 dri_bo_unreference(pp_context->vfe_state.bo);
4371 bo = dri_bo_alloc(i965->intel.bufmgr,
4373 sizeof(struct i965_vfe_state),
4376 pp_context->vfe_state.bo = bo;
4378 if (IS_GEN7(i965->intel.device_info)) {
4379 static_param_size = sizeof(struct gen7_pp_static_parameter);
4380 inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4382 static_param_size = sizeof(struct pp_static_parameter);
4383 inline_param_size = sizeof(struct pp_inline_parameter);
4386 memset(pp_context->pp_static_parameter, 0, static_param_size);
4387 memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4389 assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4390 pp_context->current_pp = pp_index;
4391 pp_module = &pp_context->pp_modules[pp_index];
4393 if (pp_module->initialize)
4394 va_status = pp_module->initialize(ctx, pp_context,
4401 va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4403 calculate_boundary_block_mask(pp_context, dst_rect);
4410 gen6_pp_interface_descriptor_table(VADriverContextP ctx,
4411 struct i965_post_processing_context *pp_context)
4413 struct i965_driver_data *i965 = i965_driver_data(ctx);
4414 struct gen6_interface_descriptor_data *desc;
4416 int pp_index = pp_context->current_pp;
4418 bo = pp_context->idrt.bo;
4419 dri_bo_map(bo, True);
4420 assert(bo->virtual);
4422 memset(desc, 0, sizeof(*desc));
4423 desc->desc0.kernel_start_pointer =
4424 pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4425 desc->desc1.single_program_flow = 1;
4426 desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4427 desc->desc2.sampler_count = 1; /* 1 - 4 samplers used */
4428 desc->desc2.sampler_state_pointer =
4429 pp_context->sampler_state_table.bo->offset >> 5;
4430 desc->desc3.binding_table_entry_count = 0;
4431 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4432 desc->desc4.constant_urb_entry_read_offset = 0;
4434 if (IS_GEN7(i965->intel.device_info))
4435 desc->desc4.constant_urb_entry_read_length = 8; /* grf 1-8 */
4437 desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4439 dri_bo_emit_reloc(bo,
4440 I915_GEM_DOMAIN_INSTRUCTION, 0,
4442 offsetof(struct gen6_interface_descriptor_data, desc0),
4443 pp_context->pp_modules[pp_index].kernel.bo);
4445 dri_bo_emit_reloc(bo,
4446 I915_GEM_DOMAIN_INSTRUCTION, 0,
4447 desc->desc2.sampler_count << 2,
4448 offsetof(struct gen6_interface_descriptor_data, desc2),
4449 pp_context->sampler_state_table.bo);
4452 pp_context->idrt.num_interface_descriptors++;
4456 gen6_pp_upload_constants(VADriverContextP ctx,
4457 struct i965_post_processing_context *pp_context)
4459 struct i965_driver_data *i965 = i965_driver_data(ctx);
4460 unsigned char *constant_buffer;
4463 assert(sizeof(struct pp_static_parameter) == 128);
4464 assert(sizeof(struct gen7_pp_static_parameter) == 256);
4466 if (IS_GEN7(i965->intel.device_info))
4467 param_size = sizeof(struct gen7_pp_static_parameter);
4469 param_size = sizeof(struct pp_static_parameter);
4471 dri_bo_map(pp_context->curbe.bo, 1);
4472 assert(pp_context->curbe.bo->virtual);
4473 constant_buffer = pp_context->curbe.bo->virtual;
4474 memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4475 dri_bo_unmap(pp_context->curbe.bo);
4479 gen6_pp_states_setup(VADriverContextP ctx,
4480 struct i965_post_processing_context *pp_context)
4482 gen6_pp_interface_descriptor_table(ctx, pp_context);
4483 gen6_pp_upload_constants(ctx, pp_context);
4487 gen6_pp_pipeline_select(VADriverContextP ctx,
4488 struct i965_post_processing_context *pp_context)
4490 struct intel_batchbuffer *batch = pp_context->batch;
4492 BEGIN_BATCH(batch, 1);
4493 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4494 ADVANCE_BATCH(batch);
4498 gen6_pp_state_base_address(VADriverContextP ctx,
4499 struct i965_post_processing_context *pp_context)
4501 struct intel_batchbuffer *batch = pp_context->batch;
4503 BEGIN_BATCH(batch, 10);
4504 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4505 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4506 OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4507 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4508 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4509 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4510 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4511 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4512 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4513 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4514 ADVANCE_BATCH(batch);
4518 gen6_pp_vfe_state(VADriverContextP ctx,
4519 struct i965_post_processing_context *pp_context)
4521 struct intel_batchbuffer *batch = pp_context->batch;
4523 BEGIN_BATCH(batch, 8);
4524 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4525 OUT_BATCH(batch, 0);
4527 (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4528 pp_context->vfe_gpu_state.num_urb_entries << 8);
4529 OUT_BATCH(batch, 0);
4531 (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
4532 /* URB Entry Allocation Size, in 256 bits unit */
4533 (pp_context->vfe_gpu_state.curbe_allocation_size));
4534 /* CURBE Allocation Size, in 256 bits unit */
4535 OUT_BATCH(batch, 0);
4536 OUT_BATCH(batch, 0);
4537 OUT_BATCH(batch, 0);
4538 ADVANCE_BATCH(batch);
4542 gen6_pp_curbe_load(VADriverContextP ctx,
4543 struct i965_post_processing_context *pp_context)
4545 struct intel_batchbuffer *batch = pp_context->batch;
4546 struct i965_driver_data *i965 = i965_driver_data(ctx);
4549 if (IS_GEN7(i965->intel.device_info))
4550 param_size = sizeof(struct gen7_pp_static_parameter);
4552 param_size = sizeof(struct pp_static_parameter);
4554 BEGIN_BATCH(batch, 4);
4555 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4556 OUT_BATCH(batch, 0);
4560 pp_context->curbe.bo,
4561 I915_GEM_DOMAIN_INSTRUCTION, 0,
4563 ADVANCE_BATCH(batch);
4567 gen6_interface_descriptor_load(VADriverContextP ctx,
4568 struct i965_post_processing_context *pp_context)
4570 struct intel_batchbuffer *batch = pp_context->batch;
4572 BEGIN_BATCH(batch, 4);
4573 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4574 OUT_BATCH(batch, 0);
4576 pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4578 pp_context->idrt.bo,
4579 I915_GEM_DOMAIN_INSTRUCTION, 0,
4581 ADVANCE_BATCH(batch);
4584 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps)
4586 struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4588 pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4589 pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4590 // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4591 pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4592 pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4593 pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4597 if (y == y_steps-1) {
4598 pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4601 pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4607 if (x == 0) { // all blocks in this group are on the left edge
4608 pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4609 pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
4611 else if (x == x_steps-1) {
4612 pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4613 pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4616 pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4617 pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4618 pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4625 gen6_pp_object_walker(VADriverContextP ctx,
4626 struct i965_post_processing_context *pp_context)
4628 struct i965_driver_data *i965 = i965_driver_data(ctx);
4629 struct intel_batchbuffer *batch = pp_context->batch;
4630 int x, x_steps, y, y_steps;
4631 int param_size, command_length_in_dws;
4632 dri_bo *command_buffer;
4633 unsigned int *command_ptr;
4635 if (IS_GEN7(i965->intel.device_info))
4636 param_size = sizeof(struct gen7_pp_inline_parameter);
4638 param_size = sizeof(struct pp_inline_parameter);
4640 x_steps = pp_context->pp_x_steps(pp_context->private_context);
4641 y_steps = pp_context->pp_y_steps(pp_context->private_context);
4642 command_length_in_dws = 6 + (param_size >> 2);
4643 command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4644 "command objects buffer",
4645 command_length_in_dws * 4 * x_steps * y_steps + 8,
4648 dri_bo_map(command_buffer, 1);
4649 command_ptr = command_buffer->virtual;
4651 for (y = 0; y < y_steps; y++) {
4652 for (x = 0; x < x_steps; x++) {
4653 if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4654 // some common block parameter update goes here, apply to all pp functions
4655 if (IS_GEN6(i965->intel.device_info))
4656 update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4658 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4664 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4665 command_ptr += (param_size >> 2);
4670 if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4673 *command_ptr = MI_BATCH_BUFFER_END;
4675 dri_bo_unmap(command_buffer);
4677 BEGIN_BATCH(batch, 2);
4678 OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4679 OUT_RELOC(batch, command_buffer,
4680 I915_GEM_DOMAIN_COMMAND, 0,
4682 ADVANCE_BATCH(batch);
4684 dri_bo_unreference(command_buffer);
4686 /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4687 * will cause control to pass back to ring buffer
4689 intel_batchbuffer_end_atomic(batch);
4690 intel_batchbuffer_flush(batch);
4691 intel_batchbuffer_start_atomic(batch, 0x1000);
4695 gen6_pp_pipeline_setup(VADriverContextP ctx,
4696 struct i965_post_processing_context *pp_context)
4698 struct intel_batchbuffer *batch = pp_context->batch;
4700 intel_batchbuffer_start_atomic(batch, 0x1000);
4701 intel_batchbuffer_emit_mi_flush(batch);
4702 gen6_pp_pipeline_select(ctx, pp_context);
4703 gen6_pp_state_base_address(ctx, pp_context);
4704 gen6_pp_vfe_state(ctx, pp_context);
4705 gen6_pp_curbe_load(ctx, pp_context);
4706 gen6_interface_descriptor_load(ctx, pp_context);
4707 gen6_pp_object_walker(ctx, pp_context);
4708 intel_batchbuffer_end_atomic(batch);
4712 gen6_post_processing(
4713 VADriverContextP ctx,
4714 struct i965_post_processing_context *pp_context,
4715 const struct i965_surface *src_surface,
4716 const VARectangle *src_rect,
4717 struct i965_surface *dst_surface,
4718 const VARectangle *dst_rect,
4725 va_status = gen6_pp_initialize(ctx, pp_context,
4733 if (va_status == VA_STATUS_SUCCESS) {
4734 gen6_pp_states_setup(ctx, pp_context);
4735 gen6_pp_pipeline_setup(ctx, pp_context);
4738 if (va_status == VA_STATUS_SUCCESS_1)
4739 va_status = VA_STATUS_SUCCESS;
4745 i965_post_processing_internal(
4746 VADriverContextP ctx,
4747 struct i965_post_processing_context *pp_context,
4748 const struct i965_surface *src_surface,
4749 const VARectangle *src_rect,
4750 struct i965_surface *dst_surface,
4751 const VARectangle *dst_rect,
4758 if (pp_context && pp_context->intel_post_processing) {
4759 va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4760 src_surface, src_rect,
4761 dst_surface, dst_rect,
4762 pp_index, filter_param);
4764 va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4771 rgb_to_yuv(unsigned int argb,
4777 int r = ((argb >> 16) & 0xff);
4778 int g = ((argb >> 8) & 0xff);
4779 int b = ((argb >> 0) & 0xff);
4781 *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4782 *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4783 *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4784 *a = ((argb >> 24) & 0xff);
4788 i965_vpp_clear_surface(VADriverContextP ctx,
4789 struct i965_post_processing_context *pp_context,
4790 struct object_surface *obj_surface,
4793 struct i965_driver_data *i965 = i965_driver_data(ctx);
4794 struct intel_batchbuffer *batch = pp_context->batch;
4795 unsigned int blt_cmd, br13;
4796 unsigned int tiling = 0, swizzle = 0;
4798 unsigned char y, u, v, a = 0;
4799 int region_width, region_height;
4801 /* Currently only support NV12 surface */
4802 if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4805 rgb_to_yuv(color, &y, &u, &v, &a);
4810 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4811 blt_cmd = XY_COLOR_BLT_CMD;
4812 pitch = obj_surface->width;
4814 if (tiling != I915_TILING_NONE) {
4815 assert(tiling == I915_TILING_Y);
4816 // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4824 if (IS_IRONLAKE(i965->intel.device_info)) {
4825 intel_batchbuffer_start_atomic(batch, 48);
4826 BEGIN_BATCH(batch, 12);
4828 /* Will double-check the command if the new chipset is added */
4829 intel_batchbuffer_start_atomic_blt(batch, 48);
4830 BEGIN_BLT_BATCH(batch, 12);
4833 region_width = obj_surface->width;
4834 region_height = obj_surface->height;
4836 OUT_BATCH(batch, blt_cmd);
4837 OUT_BATCH(batch, br13);
4842 region_height << 16 |
4844 OUT_RELOC(batch, obj_surface->bo,
4845 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4847 OUT_BATCH(batch, y);
4853 region_width = obj_surface->width / 2;
4854 region_height = obj_surface->height / 2;
4856 if (tiling == I915_TILING_Y) {
4857 region_height = ALIGN(obj_surface->height / 2, 32);
4860 OUT_BATCH(batch, blt_cmd);
4861 OUT_BATCH(batch, br13);
4866 region_height << 16 |
4868 OUT_RELOC(batch, obj_surface->bo,
4869 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4870 obj_surface->width * obj_surface->y_cb_offset);
4871 OUT_BATCH(batch, v << 8 | u);
4873 ADVANCE_BATCH(batch);
4874 intel_batchbuffer_end_atomic(batch);
4878 i965_scaling_processing(
4879 VADriverContextP ctx,
4880 struct object_surface *src_surface_obj,
4881 const VARectangle *src_rect,
4882 struct object_surface *dst_surface_obj,
4883 const VARectangle *dst_rect,
4884 unsigned int va_flags)
4886 VAStatus va_status = VA_STATUS_SUCCESS;
4887 struct i965_driver_data *i965 = i965_driver_data(ctx);
4889 assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4890 assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4892 if (HAS_VPP(i965)) {
4893 struct i965_surface src_surface;
4894 struct i965_surface dst_surface;
4895 struct i965_post_processing_context *pp_context;
4896 unsigned int filter_flags;
4898 _i965LockMutex(&i965->pp_mutex);
4900 src_surface.base = (struct object_base *)src_surface_obj;
4901 src_surface.type = I965_SURFACE_TYPE_SURFACE;
4902 src_surface.flags = I965_SURFACE_FLAG_FRAME;
4903 dst_surface.base = (struct object_base *)dst_surface_obj;
4904 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4905 dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4907 pp_context = i965->pp_context;
4908 filter_flags = pp_context->filter_flags;
4909 pp_context->filter_flags = va_flags;
4911 va_status = i965_post_processing_internal(ctx, pp_context,
4912 &src_surface, src_rect, &dst_surface, dst_rect,
4913 avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
4915 pp_context->filter_flags = filter_flags;
4917 _i965UnlockMutex(&i965->pp_mutex);
4924 i965_post_processing(
4925 VADriverContextP ctx,
4926 struct object_surface *obj_surface,
4927 const VARectangle *src_rect,
4928 const VARectangle *dst_rect,
4929 unsigned int va_flags,
4930 int *has_done_scaling,
4931 VARectangle *calibrated_rect
4934 struct i965_driver_data *i965 = i965_driver_data(ctx);
4935 VASurfaceID out_surface_id = VA_INVALID_ID;
4936 VASurfaceID tmp_id = VA_INVALID_ID;
4938 *has_done_scaling = 0;
4940 if (HAS_VPP(i965)) {
4942 struct i965_surface src_surface;
4943 struct i965_surface dst_surface;
4944 struct i965_post_processing_context *pp_context;
4946 /* Currently only support post processing for NV12 surface */
4947 if (obj_surface->fourcc != VA_FOURCC_NV12)
4948 return out_surface_id;
4950 _i965LockMutex(&i965->pp_mutex);
4952 pp_context = i965->pp_context;
4953 pp_context->filter_flags = va_flags;
4954 if (avs_is_needed(va_flags)) {
4955 VARectangle tmp_dst_rect;
4957 if (out_surface_id != VA_INVALID_ID)
4958 tmp_id = out_surface_id;
4962 tmp_dst_rect.width = dst_rect->width;
4963 tmp_dst_rect.height = dst_rect->height;
4964 src_surface.base = (struct object_base *)obj_surface;
4965 src_surface.type = I965_SURFACE_TYPE_SURFACE;
4966 src_surface.flags = I965_SURFACE_FLAG_FRAME;
4968 status = i965_CreateSurfaces(ctx,
4971 VA_RT_FORMAT_YUV420,
4974 assert(status == VA_STATUS_SUCCESS);
4975 obj_surface = SURFACE(out_surface_id);
4976 assert(obj_surface);
4977 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4978 i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
4980 dst_surface.base = (struct object_base *)obj_surface;
4981 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4982 dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4984 i965_post_processing_internal(ctx, pp_context,
4992 if (tmp_id != VA_INVALID_ID)
4993 i965_DestroySurfaces(ctx, &tmp_id, 1);
4995 *has_done_scaling = 1;
4996 calibrated_rect->x = 0;
4997 calibrated_rect->y = 0;
4998 calibrated_rect->width = dst_rect->width;
4999 calibrated_rect->height = dst_rect->height;
5002 _i965UnlockMutex(&i965->pp_mutex);
5005 return out_surface_id;
5009 i965_image_pl2_processing(VADriverContextP ctx,
5010 const struct i965_surface *src_surface,
5011 const VARectangle *src_rect,
5012 struct i965_surface *dst_surface,
5013 const VARectangle *dst_rect);
5016 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
5017 VAStatus (*i965_image_plx_nv12_processing)(
5019 const struct i965_surface *,
5020 const VARectangle *,
5021 struct i965_surface *,
5022 const VARectangle *),
5023 const struct i965_surface *src_surface,
5024 const VARectangle *src_rect,
5025 struct i965_surface *dst_surface,
5026 const VARectangle *dst_rect)
5028 struct i965_driver_data *i965 = i965_driver_data(ctx);
5030 VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5031 struct object_surface *obj_surface = NULL;
5032 struct i965_surface tmp_surface;
5035 pp_get_surface_size(ctx, dst_surface, &width, &height);
5036 status = i965_CreateSurfaces(ctx,
5039 VA_RT_FORMAT_YUV420,
5042 assert(status == VA_STATUS_SUCCESS);
5043 obj_surface = SURFACE(tmp_surface_id);
5044 assert(obj_surface);
5045 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5047 tmp_surface.base = (struct object_base *)obj_surface;
5048 tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5049 tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5051 status = i965_image_plx_nv12_processing(ctx,
5057 if (status == VA_STATUS_SUCCESS)
5058 status = i965_image_pl2_processing(ctx,
5064 i965_DestroySurfaces(ctx,
5073 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5074 const struct i965_surface *src_surface,
5075 const VARectangle *src_rect,
5076 struct i965_surface *dst_surface,
5077 const VARectangle *dst_rect)
5079 struct i965_driver_data *i965 = i965_driver_data(ctx);
5080 struct i965_post_processing_context *pp_context = i965->pp_context;
5081 int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5085 case VA_FOURCC_NV12:
5086 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5091 PP_RGBX_LOAD_SAVE_NV12,
5093 intel_batchbuffer_flush(pp_context->batch);
5097 vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5098 i965_image_pl1_rgbx_processing,
5110 i965_image_pl3_processing(VADriverContextP ctx,
5111 const struct i965_surface *src_surface,
5112 const VARectangle *src_rect,
5113 struct i965_surface *dst_surface,
5114 const VARectangle *dst_rect)
5116 struct i965_driver_data *i965 = i965_driver_data(ctx);
5117 struct i965_post_processing_context *pp_context = i965->pp_context;
5118 int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5119 VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5122 case VA_FOURCC_NV12:
5123 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5128 PP_PL3_LOAD_SAVE_N12,
5130 intel_batchbuffer_flush(pp_context->batch);
5133 case VA_FOURCC_IMC1:
5134 case VA_FOURCC_IMC3:
5135 case VA_FOURCC_YV12:
5136 case VA_FOURCC_I420:
5137 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5142 PP_PL3_LOAD_SAVE_PL3,
5144 intel_batchbuffer_flush(pp_context->batch);
5147 case VA_FOURCC_YUY2:
5148 case VA_FOURCC_UYVY:
5149 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5154 PP_PL3_LOAD_SAVE_PA,
5156 intel_batchbuffer_flush(pp_context->batch);
5160 vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5161 i965_image_pl3_processing,
5173 i965_image_pl2_processing(VADriverContextP ctx,
5174 const struct i965_surface *src_surface,
5175 const VARectangle *src_rect,
5176 struct i965_surface *dst_surface,
5177 const VARectangle *dst_rect)
5179 struct i965_driver_data *i965 = i965_driver_data(ctx);
5180 struct i965_post_processing_context *pp_context = i965->pp_context;
5181 int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5182 VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5185 case VA_FOURCC_NV12:
5186 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5191 PP_NV12_LOAD_SAVE_N12,
5195 case VA_FOURCC_IMC1:
5196 case VA_FOURCC_IMC3:
5197 case VA_FOURCC_YV12:
5198 case VA_FOURCC_I420:
5199 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5204 PP_NV12_LOAD_SAVE_PL3,
5208 case VA_FOURCC_YUY2:
5209 case VA_FOURCC_UYVY:
5210 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5215 PP_NV12_LOAD_SAVE_PA,
5219 case VA_FOURCC_BGRX:
5220 case VA_FOURCC_BGRA:
5221 case VA_FOURCC_RGBX:
5222 case VA_FOURCC_RGBA:
5223 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5228 PP_NV12_LOAD_SAVE_RGBX,
5233 return VA_STATUS_ERROR_UNIMPLEMENTED;
5236 intel_batchbuffer_flush(pp_context->batch);
5242 i965_image_pl1_processing(VADriverContextP ctx,
5243 const struct i965_surface *src_surface,
5244 const VARectangle *src_rect,
5245 struct i965_surface *dst_surface,
5246 const VARectangle *dst_rect)
5248 struct i965_driver_data *i965 = i965_driver_data(ctx);
5249 struct i965_post_processing_context *pp_context = i965->pp_context;
5250 int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5254 case VA_FOURCC_NV12:
5255 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5260 PP_PA_LOAD_SAVE_NV12,
5262 intel_batchbuffer_flush(pp_context->batch);
5265 case VA_FOURCC_YV12:
5266 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5271 PP_PA_LOAD_SAVE_PL3,
5273 intel_batchbuffer_flush(pp_context->batch);
5276 case VA_FOURCC_YUY2:
5277 case VA_FOURCC_UYVY:
5278 vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5285 intel_batchbuffer_flush(pp_context->batch);
5289 vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5290 i965_image_pl1_processing,
5301 // it only support NV12 and P010 for vebox proc ctx
5302 static struct object_surface *derive_surface(VADriverContextP ctx,
5303 struct object_image *obj_image,
5304 struct object_surface *obj_surface)
5306 VAImage * const image = &obj_image->image;
5308 memset((void *)obj_surface, 0, sizeof(*obj_surface));
5309 obj_surface->fourcc = image->format.fourcc;
5310 obj_surface->orig_width = image->width;
5311 obj_surface->orig_height = image->height;
5312 obj_surface->width = image->pitches[0];
5313 obj_surface->height = image->height;
5314 obj_surface->y_cb_offset = image->offsets[1] / obj_surface->width;
5315 obj_surface->y_cr_offset = obj_surface->y_cb_offset;
5316 obj_surface->bo = obj_image->bo;
5317 obj_surface->subsampling = SUBSAMPLE_YUV420;
5323 vebox_processing_simple(VADriverContextP ctx,
5324 struct i965_post_processing_context *pp_context,
5325 struct object_surface *src_obj_surface,
5326 struct object_surface *dst_obj_surface,
5327 const VARectangle *rect)
5329 struct i965_driver_data *i965 = i965_driver_data(ctx);
5330 VAProcPipelineParameterBuffer pipeline_param;
5331 VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5333 if(pp_context->vebox_proc_ctx == NULL) {
5334 pp_context->vebox_proc_ctx = gen75_vebox_context_init(ctx);
5337 memset((void *)&pipeline_param, 0, sizeof(pipeline_param));
5338 pipeline_param.surface_region = rect;
5339 pipeline_param.output_region = rect;
5340 pipeline_param.filter_flags = 0;
5341 pipeline_param.num_filters = 0;
5343 pp_context->vebox_proc_ctx->pipeline_param = &pipeline_param;
5344 pp_context->vebox_proc_ctx->surface_input_object = src_obj_surface;
5345 pp_context->vebox_proc_ctx->surface_output_object = dst_obj_surface;
5347 if (IS_GEN9(i965->intel.device_info))
5348 status = gen9_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5354 i965_image_p010_processing(VADriverContextP ctx,
5355 const struct i965_surface *src_surface,
5356 const VARectangle *src_rect,
5357 struct i965_surface *dst_surface,
5358 const VARectangle *dst_rect)
5360 #define HAS_VPP_P010(ctx) ((ctx)->codec_info->has_vpp_p010 && \
5361 (ctx)->intel.has_bsd)
5363 struct i965_driver_data *i965 = i965_driver_data(ctx);
5364 struct i965_post_processing_context *pp_context = i965->pp_context;
5365 struct object_surface *src_obj_surface = NULL, *dst_obj_surface = NULL;
5366 struct object_surface tmp_src_obj_surface, tmp_dst_obj_surface;
5367 struct object_surface *tmp_surface = NULL;
5368 VASurfaceID tmp_surface_id[3], out_surface_id = VA_INVALID_ID;
5369 int num_tmp_surfaces = 0;
5370 int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5371 VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5374 if(HAS_VPP_P010(i965)) {
5377 case VA_FOURCC_NV12:
5378 if(src_rect->x != dst_rect->x ||
5379 src_rect->y != dst_rect->y ||
5380 src_rect->width != dst_rect->width ||
5381 src_rect->height != dst_rect->height) {
5385 case VA_FOURCC_P010:
5386 // don't support scaling while the fourcc of dst_surface is P010
5387 if(src_rect->x != dst_rect->x ||
5388 src_rect->y != dst_rect->y ||
5389 src_rect->width != dst_rect->width ||
5390 src_rect->height != dst_rect->height) {
5391 vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5400 if(src_surface->type == I965_SURFACE_TYPE_IMAGE) {
5401 src_obj_surface = derive_surface(ctx, (struct object_image *)src_surface->base,
5402 &tmp_src_obj_surface);
5405 src_obj_surface = (struct object_surface *)src_surface->base;
5407 if(src_obj_surface == NULL) {
5408 vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5413 vaStatus = i965_CreateSurfaces(ctx,
5414 src_obj_surface->orig_width,
5415 src_obj_surface->orig_height,
5416 VA_RT_FORMAT_YUV420,
5419 assert(vaStatus == VA_STATUS_SUCCESS);
5420 tmp_surface_id[num_tmp_surfaces++] = out_surface_id;
5421 tmp_surface = SURFACE(out_surface_id);
5422 assert(tmp_surface);
5423 i965_check_alloc_surface_bo(ctx, tmp_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5426 if(tmp_surface != NULL)
5427 dst_obj_surface = tmp_surface;
5429 if(dst_surface->type == I965_SURFACE_TYPE_IMAGE) {
5430 dst_obj_surface = derive_surface(ctx, (struct object_image *)dst_surface->base,
5431 &tmp_dst_obj_surface);
5434 dst_obj_surface = (struct object_surface *)dst_surface->base;
5437 if(dst_obj_surface == NULL) {
5438 vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5442 vaStatus = vebox_processing_simple(ctx,
5447 if(vaStatus != VA_STATUS_SUCCESS)
5451 struct i965_surface src_surface_new;
5453 if(tmp_surface != NULL){
5454 src_surface_new.base = (struct object_base *)tmp_surface;
5455 src_surface_new.type = I965_SURFACE_TYPE_SURFACE;
5456 src_surface_new.flags = I965_SURFACE_FLAG_FRAME;
5459 memcpy((void *)&src_surface_new, (void *)src_surface, sizeof(src_surface_new));
5461 vaStatus = i965_image_pl2_processing(ctx,
5470 if(num_tmp_surfaces)
5471 i965_DestroySurfaces(ctx,
5479 i965_image_processing(VADriverContextP ctx,
5480 const struct i965_surface *src_surface,
5481 const VARectangle *src_rect,
5482 struct i965_surface *dst_surface,
5483 const VARectangle *dst_rect)
5485 struct i965_driver_data *i965 = i965_driver_data(ctx);
5486 VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5488 if (HAS_VPP(i965)) {
5489 int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5491 _i965LockMutex(&i965->pp_mutex);
5494 case VA_FOURCC_YV12:
5495 case VA_FOURCC_I420:
5496 case VA_FOURCC_IMC1:
5497 case VA_FOURCC_IMC3:
5498 case VA_FOURCC_422H:
5499 case VA_FOURCC_422V:
5500 case VA_FOURCC_411P:
5501 case VA_FOURCC_444P:
5502 case VA_FOURCC_YV16:
5503 status = i965_image_pl3_processing(ctx,
5510 case VA_FOURCC_NV12:
5511 status = i965_image_pl2_processing(ctx,
5517 case VA_FOURCC_YUY2:
5518 case VA_FOURCC_UYVY:
5519 status = i965_image_pl1_processing(ctx,
5525 case VA_FOURCC_BGRA:
5526 case VA_FOURCC_BGRX:
5527 case VA_FOURCC_RGBA:
5528 case VA_FOURCC_RGBX:
5529 status = i965_image_pl1_rgbx_processing(ctx,
5535 case VA_FOURCC_P010:
5536 status = i965_image_p010_processing(ctx,
5543 status = VA_STATUS_ERROR_UNIMPLEMENTED;
5547 _i965UnlockMutex(&i965->pp_mutex);
5554 i965_post_processing_context_finalize(VADriverContextP ctx,
5555 struct i965_post_processing_context *pp_context)
5559 dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5560 pp_context->surface_state_binding_table.bo = NULL;
5562 dri_bo_unreference(pp_context->curbe.bo);
5563 pp_context->curbe.bo = NULL;
5565 dri_bo_unreference(pp_context->sampler_state_table.bo);
5566 pp_context->sampler_state_table.bo = NULL;
5568 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5569 pp_context->sampler_state_table.bo_8x8 = NULL;
5571 dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5572 pp_context->sampler_state_table.bo_8x8_uv = NULL;
5574 dri_bo_unreference(pp_context->idrt.bo);
5575 pp_context->idrt.bo = NULL;
5576 pp_context->idrt.num_interface_descriptors = 0;
5578 dri_bo_unreference(pp_context->vfe_state.bo);
5579 pp_context->vfe_state.bo = NULL;
5581 for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
5582 pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
5585 dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5586 pp_context->pp_dn_context.stmm_bo = NULL;
5588 for (i = 0; i < NUM_PP_MODULES; i++) {
5589 struct pp_module *pp_module = &pp_context->pp_modules[i];
5591 dri_bo_unreference(pp_module->kernel.bo);
5592 pp_module->kernel.bo = NULL;
5595 free(pp_context->pp_static_parameter);
5596 free(pp_context->pp_inline_parameter);
5597 pp_context->pp_static_parameter = NULL;
5598 pp_context->pp_inline_parameter = NULL;
5602 i965_post_processing_terminate(VADriverContextP ctx)
5604 struct i965_driver_data *i965 = i965_driver_data(ctx);
5605 struct i965_post_processing_context *pp_context = i965->pp_context;
5608 pp_context->finalize(ctx, pp_context);
5612 i965->pp_context = NULL;
5615 #define VPP_CURBE_ALLOCATION_SIZE 32
5618 i965_post_processing_context_init(VADriverContextP ctx,
5620 struct intel_batchbuffer *batch)
5622 struct i965_driver_data *i965 = i965_driver_data(ctx);
5624 struct i965_post_processing_context *pp_context = data;
5625 const AVSConfig *avs_config;
5627 if (IS_IRONLAKE(i965->intel.device_info)) {
5628 pp_context->urb.size = i965->intel.device_info->urb_size;
5629 pp_context->urb.num_vfe_entries = 32;
5630 pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
5631 pp_context->urb.num_cs_entries = 1;
5632 pp_context->urb.size_cs_entry = 2;
5633 pp_context->urb.vfe_start = 0;
5634 pp_context->urb.cs_start = pp_context->urb.vfe_start +
5635 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5636 assert(pp_context->urb.cs_start +
5637 pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5638 pp_context->intel_post_processing = ironlake_post_processing;
5640 pp_context->vfe_gpu_state.max_num_threads = 60;
5641 pp_context->vfe_gpu_state.num_urb_entries = 59;
5642 pp_context->vfe_gpu_state.gpgpu_mode = 0;
5643 pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5644 pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5645 pp_context->intel_post_processing = gen6_post_processing;
5648 pp_context->finalize = i965_post_processing_context_finalize;
5650 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5651 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5652 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5653 assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5655 if (IS_HASWELL(i965->intel.device_info))
5656 memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5657 else if (IS_GEN7(i965->intel.device_info))
5658 memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5659 else if (IS_GEN6(i965->intel.device_info))
5660 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5661 else if (IS_IRONLAKE(i965->intel.device_info))
5662 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5664 for (i = 0; i < NUM_PP_MODULES; i++) {
5665 struct pp_module *pp_module = &pp_context->pp_modules[i];
5666 dri_bo_unreference(pp_module->kernel.bo);
5667 if (pp_module->kernel.bin && pp_module->kernel.size) {
5668 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5669 pp_module->kernel.name,
5670 pp_module->kernel.size,
5672 assert(pp_module->kernel.bo);
5673 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5675 pp_module->kernel.bo = NULL;
5679 /* static & inline parameters */
5680 if (IS_GEN7(i965->intel.device_info)) {
5681 pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5682 pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5684 pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5685 pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5688 pp_context->batch = batch;
5689 pp_dndi_context_init(&pp_context->pp_dndi_context);
5691 avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
5693 avs_init_state(&pp_context->pp_avs_context.state, avs_config);
5697 i965_post_processing_init(VADriverContextP ctx)
5699 struct i965_driver_data *i965 = i965_driver_data(ctx);
5700 struct i965_post_processing_context *pp_context = i965->pp_context;
5702 if (HAS_VPP(i965)) {
5703 if (pp_context == NULL) {
5704 pp_context = calloc(1, sizeof(*pp_context));
5706 i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5707 i965->pp_context = pp_context;
5714 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5715 PP_NULL, /* VAProcFilterNone */
5716 PP_NV12_DN, /* VAProcFilterNoiseReduction */
5717 PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5718 PP_NULL, /* VAProcFilterSharpening */
5719 PP_NULL, /* VAProcFilterColorBalance */
5722 static const int proc_frame_to_pp_frame[3] = {
5723 I965_SURFACE_FLAG_FRAME,
5724 I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5725 I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5729 PP_OP_CHANGE_FORMAT = 1 << 0,
5730 PP_OP_CHANGE_SIZE = 1 << 1,
5731 PP_OP_DEINTERLACE = 1 << 2,
5732 PP_OP_COMPLEX = 1 << 3,
5736 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
5737 uint32_t filter_flags)
5742 dst_fourcc = src_fourcc;
5744 switch (src_fourcc) {
5745 case VA_FOURCC_RGBX:
5746 case VA_FOURCC_RGBA:
5747 case VA_FOURCC_BGRX:
5748 case VA_FOURCC_BGRA:
5749 switch (dst_fourcc) {
5750 case VA_FOURCC_NV12:
5751 pp_index = PP_RGBX_LOAD_SAVE_NV12;
5755 case VA_FOURCC_YUY2:
5756 case VA_FOURCC_UYVY:
5757 switch (dst_fourcc) {
5758 case VA_FOURCC_NV12:
5759 pp_index = PP_PA_LOAD_SAVE_NV12;
5761 case VA_FOURCC_I420:
5762 case VA_FOURCC_YV12:
5763 pp_index = PP_PA_LOAD_SAVE_PL3;
5765 case VA_FOURCC_YUY2:
5766 case VA_FOURCC_UYVY:
5767 pp_index = PP_PA_LOAD_SAVE_PA;
5771 case VA_FOURCC_NV12:
5772 switch (dst_fourcc) {
5773 case VA_FOURCC_NV12:
5774 if (pp_ops & PP_OP_CHANGE_SIZE)
5775 pp_index = avs_is_needed(filter_flags) ?
5776 PP_NV12_AVS : PP_NV12_SCALING;
5778 pp_index = PP_NV12_LOAD_SAVE_N12;
5780 case VA_FOURCC_I420:
5781 case VA_FOURCC_YV12:
5782 case VA_FOURCC_IMC1:
5783 case VA_FOURCC_IMC3:
5784 pp_index = PP_NV12_LOAD_SAVE_PL3;
5786 case VA_FOURCC_YUY2:
5787 case VA_FOURCC_UYVY:
5788 pp_index = PP_NV12_LOAD_SAVE_PA;
5790 case VA_FOURCC_RGBX:
5791 case VA_FOURCC_RGBA:
5792 case VA_FOURCC_BGRX:
5793 case VA_FOURCC_BGRA:
5794 pp_index = PP_NV12_LOAD_SAVE_RGBX;
5798 case VA_FOURCC_I420:
5799 case VA_FOURCC_YV12:
5800 case VA_FOURCC_IMC1:
5801 case VA_FOURCC_IMC3:
5802 case VA_FOURCC_YV16:
5803 case VA_FOURCC_411P:
5804 case VA_FOURCC_422H:
5805 case VA_FOURCC_422V:
5806 case VA_FOURCC_444P:
5807 switch (dst_fourcc) {
5808 case VA_FOURCC_NV12:
5809 pp_index = PP_PL3_LOAD_SAVE_N12;
5811 case VA_FOURCC_I420:
5812 case VA_FOURCC_YV12:
5813 case VA_FOURCC_IMC1:
5814 case VA_FOURCC_IMC3:
5815 pp_index = PP_PL3_LOAD_SAVE_PL3;
5817 case VA_FOURCC_YUY2:
5818 case VA_FOURCC_UYVY:
5819 pp_index = PP_PL3_LOAD_SAVE_PA;
5828 i965_proc_picture_fast(VADriverContextP ctx,
5829 struct i965_proc_context *proc_context, struct proc_state *proc_state)
5831 struct i965_driver_data * const i965 = i965_driver_data(ctx);
5832 const VAProcPipelineParameterBuffer * const pipeline_param =
5833 (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5834 struct object_surface *src_obj_surface, *dst_obj_surface;
5835 struct i965_surface src_surface, dst_surface;
5836 const VAProcFilterParameterBufferDeinterlacing *deint_params = NULL;
5837 VARectangle src_rect, dst_rect;
5839 uint32_t i, filter_flags = 0, pp_ops = 0;
5842 /* Validate pipeline parameters */
5843 if (pipeline_param->num_filters > 0 && !pipeline_param->filters)
5844 return VA_STATUS_ERROR_INVALID_PARAMETER;
5846 for (i = 0; i < pipeline_param->num_filters; i++) {
5847 const VAProcFilterParameterBuffer *filter;
5848 struct object_buffer * const obj_buffer =
5849 BUFFER(pipeline_param->filters[i]);
5851 assert(obj_buffer && obj_buffer->buffer_store);
5852 if (!obj_buffer || !obj_buffer->buffer_store)
5853 return VA_STATUS_ERROR_INVALID_PARAMETER;
5855 filter = (VAProcFilterParameterBuffer *)
5856 obj_buffer->buffer_store->buffer;
5857 switch (filter->type) {
5858 case VAProcFilterDeinterlacing:
5859 pp_ops |= PP_OP_DEINTERLACE;
5860 deint_params = (VAProcFilterParameterBufferDeinterlacing *)filter;
5863 pp_ops |= PP_OP_COMPLEX;
5867 filter_flags |= pipeline_param->filter_flags & VA_FILTER_SCALING_MASK;
5869 /* Validate source surface */
5870 src_obj_surface = SURFACE(pipeline_param->surface);
5871 if (!src_obj_surface)
5872 return VA_STATUS_ERROR_INVALID_SURFACE;
5874 if (!src_obj_surface->fourcc)
5875 return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
5877 if (pipeline_param->surface_region) {
5878 src_rect.x = pipeline_param->surface_region->x;
5879 src_rect.y = pipeline_param->surface_region->y;
5880 src_rect.width = pipeline_param->surface_region->width;
5881 src_rect.height = pipeline_param->surface_region->height;
5885 src_rect.width = src_obj_surface->orig_width;
5886 src_rect.height = src_obj_surface->orig_height;
5889 src_surface.base = &src_obj_surface->base;
5890 src_surface.type = I965_SURFACE_TYPE_SURFACE;
5891 src_surface.flags = I965_SURFACE_FLAG_FRAME;
5893 if (pp_ops & PP_OP_DEINTERLACE) {
5894 filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
5895 VA_TOP_FIELD : VA_BOTTOM_FIELD;
5896 if (deint_params->algorithm != VAProcDeinterlacingBob)
5897 pp_ops |= PP_OP_COMPLEX;
5899 else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
5900 filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
5901 VA_TOP_FIELD : VA_BOTTOM_FIELD;
5902 pp_ops |= PP_OP_DEINTERLACE;
5904 if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
5905 pp_ops |= PP_OP_COMPLEX;
5907 /* Validate target surface */
5908 dst_obj_surface = SURFACE(proc_state->current_render_target);
5909 if (!dst_obj_surface)
5910 return VA_STATUS_ERROR_INVALID_SURFACE;
5912 if (!dst_obj_surface->bo)
5913 return VA_STATUS_ERROR_INVALID_SURFACE;
5915 if (dst_obj_surface->fourcc &&
5916 dst_obj_surface->fourcc != src_obj_surface->fourcc)
5917 pp_ops |= PP_OP_CHANGE_FORMAT;
5919 if (pipeline_param->output_region) {
5920 dst_rect.x = pipeline_param->output_region->x;
5921 dst_rect.y = pipeline_param->output_region->y;
5922 dst_rect.width = pipeline_param->output_region->width;
5923 dst_rect.height = pipeline_param->output_region->height;
5927 dst_rect.width = dst_obj_surface->orig_width;
5928 dst_rect.height = dst_obj_surface->orig_height;
5931 if (dst_rect.width != src_rect.width || dst_rect.height != src_rect.height)
5932 pp_ops |= PP_OP_CHANGE_SIZE;
5934 dst_surface.base = &dst_obj_surface->base;
5935 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5936 dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5938 /* Validate "fast-path" processing capabilities */
5939 if (!IS_GEN7(i965->intel.device_info)) {
5940 if ((pp_ops & PP_OP_CHANGE_FORMAT) && (pp_ops & PP_OP_CHANGE_SIZE))
5941 return VA_STATUS_ERROR_UNIMPLEMENTED; // temporary surface is needed
5943 if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
5944 filter_flags &= ~VA_FILTER_SCALING_MASK;
5945 filter_flags |= VA_FILTER_SCALING_FAST;
5948 if (pp_ops & PP_OP_COMPLEX)
5949 return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
5950 if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
5951 return VA_STATUS_ERROR_UNIMPLEMENTED;
5954 pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
5955 dst_obj_surface->fourcc, pp_ops, filter_flags);
5957 return VA_STATUS_ERROR_UNIMPLEMENTED;
5959 proc_context->pp_context.filter_flags = filter_flags;
5960 status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5961 &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
5962 intel_batchbuffer_flush(proc_context->pp_context.batch);
5967 i965_proc_picture(VADriverContextP ctx,
5969 union codec_state *codec_state,
5970 struct hw_context *hw_context)
5972 struct i965_driver_data *i965 = i965_driver_data(ctx);
5973 struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5974 struct proc_state *proc_state = &codec_state->proc;
5975 VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5976 struct object_surface *obj_surface;
5977 struct i965_surface src_surface, dst_surface;
5978 VARectangle src_rect, dst_rect;
5981 VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5982 int num_tmp_surfaces = 0;
5983 unsigned int tiling = 0, swizzle = 0;
5984 int in_width, in_height;
5986 status = i965_proc_picture_fast(ctx, proc_context, proc_state);
5987 if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
5990 if (pipeline_param->surface == VA_INVALID_ID ||
5991 proc_state->current_render_target == VA_INVALID_ID) {
5992 status = VA_STATUS_ERROR_INVALID_SURFACE;
5996 obj_surface = SURFACE(pipeline_param->surface);
5999 status = VA_STATUS_ERROR_INVALID_SURFACE;
6003 if (!obj_surface->bo) {
6004 status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6008 if (pipeline_param->num_filters && !pipeline_param->filters) {
6009 status = VA_STATUS_ERROR_INVALID_PARAMETER;
6013 in_width = obj_surface->orig_width;
6014 in_height = obj_surface->orig_height;
6015 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6017 src_surface.base = (struct object_base *)obj_surface;
6018 src_surface.type = I965_SURFACE_TYPE_SURFACE;
6019 src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6021 VASurfaceID out_surface_id = VA_INVALID_ID;
6022 if (obj_surface->fourcc != VA_FOURCC_NV12) {
6023 src_surface.base = (struct object_base *)obj_surface;
6024 src_surface.type = I965_SURFACE_TYPE_SURFACE;
6025 src_surface.flags = I965_SURFACE_FLAG_FRAME;
6028 src_rect.width = in_width;
6029 src_rect.height = in_height;
6031 status = i965_CreateSurfaces(ctx,
6034 VA_RT_FORMAT_YUV420,
6037 if (status != VA_STATUS_SUCCESS)
6039 tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6040 obj_surface = SURFACE(out_surface_id);
6041 assert(obj_surface);
6042 i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6044 dst_surface.base = (struct object_base *)obj_surface;
6045 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6046 dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6049 dst_rect.width = in_width;
6050 dst_rect.height = in_height;
6052 status = i965_image_processing(ctx,
6057 if (status != VA_STATUS_SUCCESS)
6060 src_surface.base = (struct object_base *)obj_surface;
6061 src_surface.type = I965_SURFACE_TYPE_SURFACE;
6062 src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6065 if (pipeline_param->surface_region) {
6066 src_rect.x = pipeline_param->surface_region->x;
6067 src_rect.y = pipeline_param->surface_region->y;
6068 src_rect.width = pipeline_param->surface_region->width;
6069 src_rect.height = pipeline_param->surface_region->height;
6073 src_rect.width = in_width;
6074 src_rect.height = in_height;
6077 proc_context->pp_context.pipeline_param = pipeline_param;
6079 for (i = 0; i < pipeline_param->num_filters; i++) {
6080 struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6081 VAProcFilterParameterBufferBase *filter_param = NULL;
6082 VAProcFilterType filter_type;
6086 !obj_buffer->buffer_store ||
6087 !obj_buffer->buffer_store->buffer) {
6088 status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6092 out_surface_id = VA_INVALID_ID;
6093 filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6094 filter_type = filter_param->type;
6095 kernel_index = procfilter_to_pp_flag[filter_type];
6097 if (kernel_index != PP_NULL &&
6098 proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6099 status = i965_CreateSurfaces(ctx,
6102 VA_RT_FORMAT_YUV420,
6105 assert(status == VA_STATUS_SUCCESS);
6106 tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6107 obj_surface = SURFACE(out_surface_id);
6108 assert(obj_surface);
6109 i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6110 dst_surface.base = (struct object_base *)obj_surface;
6111 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6112 status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6120 if (status == VA_STATUS_SUCCESS) {
6121 src_surface.base = dst_surface.base;
6122 src_surface.type = dst_surface.type;
6123 src_surface.flags = dst_surface.flags;
6128 proc_context->pp_context.pipeline_param = NULL;
6129 obj_surface = SURFACE(proc_state->current_render_target);
6132 status = VA_STATUS_ERROR_INVALID_SURFACE;
6136 if (pipeline_param->output_region) {
6137 dst_rect.x = pipeline_param->output_region->x;
6138 dst_rect.y = pipeline_param->output_region->y;
6139 dst_rect.width = pipeline_param->output_region->width;
6140 dst_rect.height = pipeline_param->output_region->height;
6144 dst_rect.width = obj_surface->orig_width;
6145 dst_rect.height = obj_surface->orig_height;
6148 if (IS_GEN7(i965->intel.device_info) ||
6149 IS_GEN8(i965->intel.device_info) ||
6150 IS_GEN9(i965->intel.device_info)) {
6151 unsigned int saved_filter_flag;
6152 struct i965_post_processing_context *i965pp_context = i965->pp_context;
6154 if (obj_surface->fourcc == 0) {
6155 i965_check_alloc_surface_bo(ctx, obj_surface, 1,
6160 i965_vpp_clear_surface(ctx, &proc_context->pp_context,
6162 pipeline_param->output_background_color);
6164 intel_batchbuffer_flush(hw_context->batch);
6166 saved_filter_flag = i965pp_context->filter_flags;
6167 i965pp_context->filter_flags = (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK);
6169 dst_surface.base = (struct object_base *)obj_surface;
6170 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6171 i965_image_processing(ctx, &src_surface, &src_rect, &dst_surface, &dst_rect);
6173 i965pp_context->filter_flags = saved_filter_flag;
6175 if (num_tmp_surfaces)
6176 i965_DestroySurfaces(ctx,
6180 return VA_STATUS_SUCCESS;
6184 if (obj_surface->fourcc && obj_surface->fourcc != VA_FOURCC_NV12){
6186 out_surface_id = VA_INVALID_ID;
6187 status = i965_CreateSurfaces(ctx,
6188 obj_surface->orig_width,
6189 obj_surface->orig_height,
6190 VA_RT_FORMAT_YUV420,
6193 assert(status == VA_STATUS_SUCCESS);
6194 tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6195 struct object_surface *csc_surface = SURFACE(out_surface_id);
6196 assert(csc_surface);
6197 i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6198 dst_surface.base = (struct object_base *)csc_surface;
6200 i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6201 dst_surface.base = (struct object_base *)obj_surface;
6204 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6205 i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color);
6207 // load/save doesn't support different origin offset for src and dst surface
6208 if (src_rect.width == dst_rect.width &&
6209 src_rect.height == dst_rect.height &&
6210 src_rect.x == dst_rect.x &&
6211 src_rect.y == dst_rect.y) {
6212 i965_post_processing_internal(ctx, &proc_context->pp_context,
6217 PP_NV12_LOAD_SAVE_N12,
6221 proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
6222 i965_post_processing_internal(ctx, &proc_context->pp_context,
6227 avs_is_needed(pipeline_param->filter_flags) ? PP_NV12_AVS : PP_NV12_SCALING,
6232 src_surface.base = dst_surface.base;
6233 src_surface.type = dst_surface.type;
6234 src_surface.flags = dst_surface.flags;
6235 dst_surface.base = (struct object_base *)obj_surface;
6236 dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6237 i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6240 if (num_tmp_surfaces)
6241 i965_DestroySurfaces(ctx,
6245 intel_batchbuffer_flush(hw_context->batch);
6247 return VA_STATUS_SUCCESS;
6250 if (num_tmp_surfaces)
6251 i965_DestroySurfaces(ctx,
6259 i965_proc_context_destroy(void *hw_context)
6261 struct i965_proc_context * const proc_context = hw_context;
6262 VADriverContextP const ctx = proc_context->driver_context;
6264 proc_context->pp_context.finalize(ctx, &proc_context->pp_context);
6265 intel_batchbuffer_free(proc_context->base.batch);
6270 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6272 struct i965_driver_data *i965 = i965_driver_data(ctx);
6273 struct intel_driver_data *intel = intel_driver_data(ctx);
6274 struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6279 proc_context->base.destroy = i965_proc_context_destroy;
6280 proc_context->base.run = i965_proc_picture;
6281 proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6282 proc_context->driver_context = ctx;
6283 i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6285 return (struct hw_context *)proc_context;