OSDN Git Service

Initialize one 10bit-scaling gpe_context for Gen9
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_post_processing.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <assert.h>
30
31 #include "intel_batchbuffer.h"
32 #include "intel_driver.h"
33 #include "i965_defines.h"
34 #include "i965_structs.h"
35 #include "i965_drv_video.h"
36 #include "i965_post_processing.h"
37 #include "i965_render.h"
38 #include "intel_media.h"
39
40 #include "gen8_post_processing.h"
41
42 static const uint32_t pp_null_gen9[][4] = {
43 };
44
45 static const uint32_t pp_nv12_load_save_nv12_gen9[][4] = {
46 #include "shaders/post_processing/gen9/pl2_to_pl2.g9b"
47 };
48
49 static const uint32_t pp_nv12_load_save_pl3_gen9[][4] = {
50 #include "shaders/post_processing/gen9/pl2_to_pl3.g9b"
51 };
52
53 static const uint32_t pp_pl3_load_save_nv12_gen9[][4] = {
54 #include "shaders/post_processing/gen9/pl3_to_pl2.g9b"
55 };
56
57 static const uint32_t pp_pl3_load_save_pl3_gen9[][4] = {
58 #include "shaders/post_processing/gen9/pl3_to_pl3.g9b"
59 };
60
61 static const uint32_t pp_nv12_scaling_gen9[][4] = {
62 #include "shaders/post_processing/gen9/pl2_to_pl2.g9b"
63 };
64
65 static const uint32_t pp_nv12_avs_gen9[][4] = {
66 #include "shaders/post_processing/gen9/pl2_to_pl2.g9b"
67 };
68
69 static const uint32_t pp_nv12_dndi_gen9[][4] = {
70 };
71
72 static const uint32_t pp_nv12_dn_gen9[][4] = {
73 };
74
75 static const uint32_t pp_nv12_load_save_pa_gen9[][4] = {
76 #include "shaders/post_processing/gen9/pl2_to_pa.g9b"
77 };
78
79 static const uint32_t pp_pl3_load_save_pa_gen9[][4] = {
80 #include "shaders/post_processing/gen9/pl3_to_pa.g9b"
81 };
82
83 static const uint32_t pp_pa_load_save_nv12_gen9[][4] = {
84 #include "shaders/post_processing/gen9/pa_to_pl2.g9b"
85 };
86
87 static const uint32_t pp_pa_load_save_pl3_gen9[][4] = {
88 #include "shaders/post_processing/gen9/pa_to_pl3.g9b"
89 };
90
91 static const uint32_t pp_pa_load_save_pa_gen9[][4] = {
92 #include "shaders/post_processing/gen9/pa_to_pa.g9b"
93 };
94
95 static const uint32_t pp_rgbx_load_save_nv12_gen9[][4] = {
96 #include "shaders/post_processing/gen9/rgbx_to_nv12.g9b"
97 };
98
99 static const uint32_t pp_nv12_load_save_rgbx_gen9[][4] = {
100 #include "shaders/post_processing/gen9/pl2_to_rgbx.g9b"
101 };
102
103 static const uint32_t pp_nv12_blending_gen9[][4] = {
104 };
105
106 #define MAX_SCALING_SURFACES    16
107
108 #define DEFAULT_MOCS    0x02
109 #define SRC_MSB         0x0001
110 #define DST_MSB         0x0002
111 #define SRC_PACKED      0x0004
112 #define DST_PACKED      0x0008
113 #define PACKED_MASK     0x000C
114
115 #define BTI_SCALING_INPUT_Y     0
116 #define BTI_SCALING_OUTPUT_Y    8
117
118 struct scaling_input_parameter {
119     unsigned int input_data[5];
120
121     float inv_width;
122     float inv_height;
123
124     struct {
125         unsigned int src_msb : 1;
126         unsigned int dst_msb : 1;
127         unsigned int src_packed : 1;
128         unsigned int dst_packed : 1;
129         unsigned int reserved : 28;
130     } dw7;
131
132     int x_dst;
133     int y_dst;
134     float    x_factor; // src_rect_width / dst_rect_width / Surface_width
135     float    y_factor; // src_rect_height / dst_rect_height / Surface_height
136     float    x_orig;
137     float    y_orig;
138     unsigned int bti_input;
139     unsigned int bti_output;
140 };
141
142 static const uint32_t pp_10bit_scaling_gen9[][4] = {
143 #include "shaders/post_processing/gen9/conv_p010.g9b"
144 };
145
146 static struct pp_module pp_modules_gen9[] = {
147     {
148         {
149             "NULL module (for testing)",
150             PP_NULL,
151             pp_null_gen9,
152             sizeof(pp_null_gen9),
153             NULL,
154         },
155
156         pp_null_initialize,
157     },
158
159     {
160         {
161             "NV12_NV12",
162             PP_NV12_LOAD_SAVE_N12,
163             pp_nv12_load_save_nv12_gen9,
164             sizeof(pp_nv12_load_save_nv12_gen9),
165             NULL,
166         },
167
168         gen8_pp_plx_avs_initialize,
169     },
170
171     {
172         {
173             "NV12_PL3",
174             PP_NV12_LOAD_SAVE_PL3,
175             pp_nv12_load_save_pl3_gen9,
176             sizeof(pp_nv12_load_save_pl3_gen9),
177             NULL,
178         },
179         gen8_pp_plx_avs_initialize,
180     },
181
182     {
183         {
184             "PL3_NV12",
185             PP_PL3_LOAD_SAVE_N12,
186             pp_pl3_load_save_nv12_gen9,
187             sizeof(pp_pl3_load_save_nv12_gen9),
188             NULL,
189         },
190
191         gen8_pp_plx_avs_initialize,
192     },
193
194     {
195         {
196             "PL3_PL3",
197             PP_PL3_LOAD_SAVE_PL3,
198             pp_pl3_load_save_pl3_gen9,
199             sizeof(pp_pl3_load_save_pl3_gen9),
200             NULL,
201         },
202
203         gen8_pp_plx_avs_initialize,
204     },
205
206     {
207         {
208             "NV12 Scaling module",
209             PP_NV12_SCALING,
210             pp_nv12_scaling_gen9,
211             sizeof(pp_nv12_scaling_gen9),
212             NULL,
213         },
214
215         gen8_pp_plx_avs_initialize,
216     },
217
218     {
219         {
220             "NV12 AVS module",
221             PP_NV12_AVS,
222             pp_nv12_avs_gen9,
223             sizeof(pp_nv12_avs_gen9),
224             NULL,
225         },
226
227         gen8_pp_plx_avs_initialize,
228     },
229
230     {
231         {
232             "NV12 DNDI module",
233             PP_NV12_DNDI,
234             pp_nv12_dndi_gen9,
235             sizeof(pp_nv12_dndi_gen9),
236             NULL,
237         },
238
239         pp_null_initialize,
240     },
241
242     {
243         {
244             "NV12 DN module",
245             PP_NV12_DN,
246             pp_nv12_dn_gen9,
247             sizeof(pp_nv12_dn_gen9),
248             NULL,
249         },
250
251         pp_null_initialize,
252     },
253     {
254         {
255             "NV12_PA module",
256             PP_NV12_LOAD_SAVE_PA,
257             pp_nv12_load_save_pa_gen9,
258             sizeof(pp_nv12_load_save_pa_gen9),
259             NULL,
260         },
261
262         gen8_pp_plx_avs_initialize,
263     },
264
265     {
266         {
267             "PL3_PA module",
268             PP_PL3_LOAD_SAVE_PA,
269             pp_pl3_load_save_pa_gen9,
270             sizeof(pp_pl3_load_save_pa_gen9),
271             NULL,
272         },
273
274         gen8_pp_plx_avs_initialize,
275     },
276
277     {
278         {
279             "PA_NV12 module",
280             PP_PA_LOAD_SAVE_NV12,
281             pp_pa_load_save_nv12_gen9,
282             sizeof(pp_pa_load_save_nv12_gen9),
283             NULL,
284         },
285
286         gen8_pp_plx_avs_initialize,
287     },
288
289     {
290         {
291             "PA_PL3 module",
292             PP_PA_LOAD_SAVE_PL3,
293             pp_pa_load_save_pl3_gen9,
294             sizeof(pp_pa_load_save_pl3_gen9),
295             NULL,
296         },
297
298         gen8_pp_plx_avs_initialize,
299     },
300
301     {
302         {
303             "PA_PA module",
304             PP_PA_LOAD_SAVE_PA,
305             pp_pa_load_save_pa_gen9,
306             sizeof(pp_pa_load_save_pa_gen9),
307             NULL,
308         },
309
310         gen8_pp_plx_avs_initialize,
311     },
312
313     {
314         {
315             "RGBX_NV12 module",
316             PP_RGBX_LOAD_SAVE_NV12,
317             pp_rgbx_load_save_nv12_gen9,
318             sizeof(pp_rgbx_load_save_nv12_gen9),
319             NULL,
320         },
321
322         gen8_pp_plx_avs_initialize,
323     },
324
325     {
326         {
327             "NV12_RGBX module",
328             PP_NV12_LOAD_SAVE_RGBX,
329             pp_nv12_load_save_rgbx_gen9,
330             sizeof(pp_nv12_load_save_rgbx_gen9),
331             NULL,
332         },
333
334         gen8_pp_plx_avs_initialize,
335     },
336 };
337
338 static const AVSConfig gen9_avs_config = {
339     .coeff_frac_bits = 6,
340     .coeff_epsilon = 1.0f / (1U << 6),
341     .num_phases = 31,
342     .num_luma_coeffs = 8,
343     .num_chroma_coeffs = 4,
344
345     .coeff_range = {
346         .lower_bound = {
347             .y_k_h = { -2, -2, -2, -2, -2, -2, -2, -2 },
348             .y_k_v = { -2, -2, -2, -2, -2, -2, -2, -2 },
349             .uv_k_h = { -2, -2, -2, -2 },
350             .uv_k_v = { -2, -2, -2, -2 },
351         },
352         .upper_bound = {
353             .y_k_h = { 2, 2, 2, 2, 2, 2, 2, 2 },
354             .y_k_v = { 2, 2, 2, 2, 2, 2, 2, 2 },
355             .uv_k_h = { 2, 2, 2, 2 },
356             .uv_k_v = { 2, 2, 2, 2 },
357         },
358     },
359 };
360
361 static void
362 gen9_pp_pipeline_select(VADriverContextP ctx,
363                         struct i965_post_processing_context *pp_context)
364 {
365     struct intel_batchbuffer *batch = pp_context->batch;
366
367     BEGIN_BATCH(batch, 1);
368     OUT_BATCH(batch,
369               CMD_PIPELINE_SELECT |
370               PIPELINE_SELECT_MEDIA |
371               GEN9_FORCE_MEDIA_AWAKE_ON |
372               GEN9_MEDIA_DOP_GATE_OFF |
373               GEN9_PIPELINE_SELECTION_MASK |
374               GEN9_MEDIA_DOP_GATE_MASK |
375               GEN9_FORCE_MEDIA_AWAKE_MASK);
376     ADVANCE_BATCH(batch);
377 }
378
379 static void
380 gen9_pp_state_base_address(VADriverContextP ctx,
381                            struct i965_post_processing_context *pp_context)
382 {
383     struct intel_batchbuffer *batch = pp_context->batch;
384
385     BEGIN_BATCH(batch, 19);
386     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
387     /* DW1 Generate state address */
388     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
389     OUT_BATCH(batch, 0);
390     OUT_BATCH(batch, 0);
391     /* DW4-5 Surface state address */
392     OUT_RELOC64(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
393     /* DW6-7 Dynamic state address */
394     OUT_RELOC64(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
395               0, 0 | BASE_ADDRESS_MODIFY);
396
397     /* DW8. Indirect object address */
398     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
399     OUT_BATCH(batch, 0);
400
401     /* DW10-11 Instruction base address */
402     OUT_RELOC64(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
403
404     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
405     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
406     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
407     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
408
409     /* Bindless surface state base address */
410     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
411     OUT_BATCH(batch, 0);
412     OUT_BATCH(batch, 0xfffff000);
413
414     ADVANCE_BATCH(batch);
415 }
416
417 static void
418 gen9_pp_end_pipeline(VADriverContextP ctx,
419                      struct i965_post_processing_context *pp_context)
420 {
421     struct intel_batchbuffer *batch = pp_context->batch;
422
423     BEGIN_BATCH(batch, 1);
424     OUT_BATCH(batch,
425               CMD_PIPELINE_SELECT |
426               PIPELINE_SELECT_MEDIA |
427               GEN9_FORCE_MEDIA_AWAKE_OFF |
428               GEN9_MEDIA_DOP_GATE_ON |
429               GEN9_PIPELINE_SELECTION_MASK |
430               GEN9_MEDIA_DOP_GATE_MASK |
431               GEN9_FORCE_MEDIA_AWAKE_MASK);
432     ADVANCE_BATCH(batch);
433 }
434
435 static void
436 gen9_pp_pipeline_setup(VADriverContextP ctx,
437                        struct i965_post_processing_context *pp_context)
438 {
439     struct intel_batchbuffer *batch = pp_context->batch;
440
441     intel_batchbuffer_start_atomic(batch, 0x1000);
442     intel_batchbuffer_emit_mi_flush(batch);
443     gen9_pp_pipeline_select(ctx, pp_context);
444     gen9_pp_state_base_address(ctx, pp_context);
445     gen8_pp_vfe_state(ctx, pp_context);
446     gen8_pp_curbe_load(ctx, pp_context);
447     gen8_interface_descriptor_load(ctx, pp_context);
448     gen8_pp_object_walker(ctx, pp_context);
449     gen9_pp_end_pipeline(ctx, pp_context);
450     intel_batchbuffer_end_atomic(batch);
451 }
452
453 static VAStatus
454 gen9_post_processing(VADriverContextP ctx,
455                      struct i965_post_processing_context *pp_context,
456                      const struct i965_surface *src_surface,
457                      const VARectangle *src_rect,
458                      struct i965_surface *dst_surface,
459                      const VARectangle *dst_rect,
460                      int pp_index,
461                      void * filter_param)
462 {
463     VAStatus va_status;
464
465     va_status = gen8_pp_initialize(ctx, pp_context,
466                                    src_surface,
467                                    src_rect,
468                                    dst_surface,
469                                    dst_rect,
470                                    pp_index,
471                                    filter_param);
472
473     if (va_status == VA_STATUS_SUCCESS) {
474         gen8_pp_states_setup(ctx, pp_context);
475         gen9_pp_pipeline_setup(ctx, pp_context);
476     }
477
478     return va_status;
479 }
480
481 static void
482 gen9_p010_scaling_sample_state(VADriverContextP ctx,
483                                struct i965_gpe_context *gpe_context,
484                                VARectangle *src_rect,
485                                VARectangle *dst_rect)
486 {
487     struct gen8_sampler_state *sampler_state;
488
489     if (gpe_context == NULL || !src_rect || !dst_rect)
490         return;
491     dri_bo_map(gpe_context->dynamic_state.bo, 1);
492
493     if (gpe_context->dynamic_state.bo->virtual == NULL)
494         return;
495
496     assert(gpe_context->dynamic_state.bo->virtual);
497
498     sampler_state = (struct gen8_sampler_state *)
499        (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
500
501     memset(sampler_state, 0, sizeof(*sampler_state));
502
503     if ((src_rect->width == dst_rect->width) &&
504         (src_rect->height == dst_rect->height)) {
505         sampler_state->ss0.min_filter = I965_MAPFILTER_NEAREST;
506         sampler_state->ss0.mag_filter = I965_MAPFILTER_NEAREST;
507     } else {
508         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
509         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
510     }
511
512     sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
513     sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
514     sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
515
516     dri_bo_unmap(gpe_context->dynamic_state.bo);
517 }
518
519 void
520 gen9_post_processing_context_init(VADriverContextP ctx,
521                                   void *data,
522                                   struct intel_batchbuffer *batch)
523 {
524     struct i965_driver_data *i965 = i965_driver_data(ctx);
525     struct i965_post_processing_context *pp_context = data;
526     struct i965_gpe_context *gpe_context;
527     struct i965_kernel scaling_kernel;
528
529     gen8_post_processing_context_common_init(ctx, data, pp_modules_gen9, ARRAY_ELEMS(pp_modules_gen9), batch);
530     avs_init_state(&pp_context->pp_avs_context.state, &gen9_avs_config);
531
532     pp_context->intel_post_processing = gen9_post_processing;
533
534     gpe_context = &pp_context->scaling_10bit_context;
535     memset(&scaling_kernel, 0, sizeof(scaling_kernel));
536     scaling_kernel.bin = pp_10bit_scaling_gen9;
537     scaling_kernel.size = sizeof(pp_10bit_scaling_gen9);
538     gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
539     gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
540     gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
541     gpe_context->curbe_size = ALIGN(sizeof(struct scaling_input_parameter), 64);
542     gpe_context->curbe.length = gpe_context->curbe_size;
543
544     gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
545     gpe_context->surface_state_binding_table.binding_table_offset = 0;
546     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
547     gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
548
549     if (i965->intel.has_bsd2)
550         gpe_context->vfe_state.max_num_threads = 300;
551     else
552         gpe_context->vfe_state.max_num_threads = 60;
553
554     gpe_context->vfe_state.curbe_allocation_size = 37;
555     gpe_context->vfe_state.urb_entry_size = 16;
556     gpe_context->vfe_state.num_urb_entries = 127;
557     gpe_context->vfe_state.gpgpu_mode = 0;
558
559     gen8_gpe_context_init(ctx, gpe_context);
560     pp_context->scaling_context_initialized = 1;
561     return;
562 }
563
564 static void
565 gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
566                                    struct i965_gpe_context *gpe_context,
567                                    dri_bo *bo,
568                                    unsigned int bo_offset,
569                                    unsigned int width,
570                                    unsigned int height,
571                                    unsigned int pitch,
572                                    int is_media_block_rw,
573                                    unsigned int format,
574                                    int index,
575                                    int is_10bit)
576 {
577     struct i965_gpe_resource gpe_resource;
578     struct i965_gpe_surface gpe_surface;
579
580     i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
581     memset(&gpe_surface, 0, sizeof(gpe_surface));
582     gpe_surface.gpe_resource = &gpe_resource;
583     gpe_surface.is_2d_surface = 1;
584     gpe_surface.is_media_block_rw = !!is_media_block_rw;
585     gpe_surface.cacheability_control = DEFAULT_MOCS;
586     gpe_surface.format = format;
587     gpe_surface.is_override_offset = 1;
588     gpe_surface.offset = bo_offset;
589     gpe_surface.is_16bpp = is_10bit;
590
591     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
592
593     i965_free_gpe_resource(&gpe_resource);
594 }
595
596 static void
597 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
598                                     struct intel_batchbuffer *batch,
599                                     struct i965_gpe_context *gpe_context,
600                                     struct gpe_media_object_walker_parameter *param)
601 {
602     if (!batch || !gpe_context || !param)
603         return;
604
605     intel_batchbuffer_start_atomic(batch, 0x1000);
606
607     intel_batchbuffer_emit_mi_flush(batch);
608
609     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
610     gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
611     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
612
613     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
614
615     intel_batchbuffer_end_atomic(batch);
616
617     intel_batchbuffer_flush(batch);
618     return;
619 }
620
621 static unsigned int
622 pp_get_surface_fourcc(VADriverContextP ctx, struct i965_surface *surface)
623 {
624     unsigned int fourcc;
625
626     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
627         struct object_image *obj_image = (struct object_image *)surface->base;
628         fourcc = obj_image->image.format.fourcc;
629     } else {
630         struct object_surface *obj_surface = (struct object_surface *)surface->base;
631         fourcc = obj_surface->fourcc;
632     }
633
634     return fourcc;
635 }
636
637 static void
638 gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
639                                struct i965_gpe_context *gpe_context,
640                                VARectangle *src_rect,
641                                struct i965_surface *src_surface,
642                                VARectangle *dst_rect,
643                                struct i965_surface *dst_surface)
644 {
645     struct scaling_input_parameter *scaling_curbe;
646     float src_width, src_height;
647     float coeff;
648     unsigned int fourcc;
649
650     if ((gpe_context == NULL) ||
651         (src_rect == NULL) || (src_surface == NULL) ||
652         (dst_rect == NULL) || (dst_surface == NULL))
653         return;
654
655     scaling_curbe = gen8p_gpe_context_map_curbe(gpe_context);
656
657     if (!scaling_curbe)
658         return;
659
660     memset(scaling_curbe, 0, sizeof(struct scaling_input_parameter));
661
662     scaling_curbe->bti_input = BTI_SCALING_INPUT_Y;
663     scaling_curbe->bti_output = BTI_SCALING_OUTPUT_Y;
664
665     /* As the src_rect/dst_rect is already checked, it is skipped.*/
666     scaling_curbe->x_dst     = dst_rect->x;
667     scaling_curbe->y_dst     = dst_rect->y;
668
669     src_width = src_rect->x + src_rect->width;
670     src_height = src_rect->y + src_rect->height;
671
672     scaling_curbe->inv_width = 1 / src_width;
673     scaling_curbe->inv_height = 1 / src_height;
674
675     coeff = (float) (src_rect->width) / dst_rect->width;
676     scaling_curbe->x_factor = coeff / src_width;
677     scaling_curbe->x_orig = (float)(src_rect->x) / src_width;
678
679     coeff = (float) (src_rect->height) / dst_rect->height;
680     scaling_curbe->y_factor = coeff / src_height;
681     scaling_curbe->y_orig = (float)(src_rect->y) / src_height;
682
683     fourcc = pp_get_surface_fourcc(ctx, src_surface);
684     if (fourcc == VA_FOURCC_P010) {
685         scaling_curbe->dw7.src_packed = 1;
686         scaling_curbe->dw7.src_msb = 1;
687     }
688     /* I010 will use LSB */
689
690     fourcc = pp_get_surface_fourcc(ctx, dst_surface);
691
692     if (fourcc == VA_FOURCC_P010) {
693         scaling_curbe->dw7.dst_packed = 1;
694         scaling_curbe->dw7.dst_msb = 1;
695     }
696     /* I010 will use LSB */
697
698     gen8p_gpe_context_unmap_curbe(gpe_context);
699 }
700
701 static bool
702 gen9_pp_context_get_surface_conf(VADriverContextP ctx,
703                                  struct i965_surface *surface,
704                                  VARectangle *rect,
705                                  int *width,
706                                  int *height,
707                                  int *pitch,
708                                  int *bo_offset)
709 {
710     unsigned int fourcc;
711     if (!rect || !surface || !width || !height || !pitch || !bo_offset)
712         return false;
713
714     if (surface->base == NULL)
715         return false;
716
717     fourcc = pp_get_surface_fourcc(ctx, surface);
718     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
719         struct object_surface *obj_surface;
720
721         obj_surface = (struct object_surface *)surface->base;
722         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
723         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
724         pitch[0] = obj_surface->width;
725         bo_offset[0] = 0;
726
727         if (fourcc == VA_FOURCC_P010) {
728             width[1] = width[0] / 2;
729             height[1] = height[0] / 2;
730             pitch[1] = obj_surface->cb_cr_pitch;
731             bo_offset[1] = obj_surface->width * obj_surface->y_cb_offset;
732         } else {
733             /* I010 format */
734             width[1] = width[0] / 2;
735             height[1] = height[0] / 2;
736             pitch[1] = obj_surface->cb_cr_pitch;
737             bo_offset[1] = obj_surface->width * obj_surface->y_cb_offset;
738             width[2] = width[0] / 2;
739             height[2] = height[0] / 2;
740             pitch[2] = obj_surface->cb_cr_pitch;
741             bo_offset[2] = obj_surface->width * obj_surface->y_cr_offset;
742         }
743
744     } else {
745         struct object_image *obj_image;
746
747         obj_image = (struct object_image *)surface->base;
748
749         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
750         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
751         pitch[0] = obj_image->image.pitches[0];
752         bo_offset[0] = obj_image->image.offsets[0];
753
754         if (fourcc == VA_FOURCC_P010) {
755             width[1] = width[0] / 2;
756             height[1] = height[0] / 2;
757             pitch[1] = obj_image->image.pitches[1];
758             bo_offset[1] = obj_image->image.offsets[1];
759         } else {
760             /* I010 format */
761             width[1] = width[0] / 2;
762             height[1] = height[0] / 2;
763             pitch[1] = obj_image->image.pitches[1];
764             bo_offset[1] = obj_image->image.offsets[1];
765             width[2] = width[0] / 2;
766             height[2] = height[0] / 2;
767             pitch[2] = obj_image->image.pitches[2];
768             bo_offset[2] = obj_image->image.offsets[2];
769         }
770
771     }
772
773     return true;
774 }
775
776 static void
777 gen9_gpe_context_p010_scaling_surfaces(VADriverContextP ctx,
778                                struct i965_gpe_context *gpe_context,
779                                VARectangle *src_rect,
780                                struct i965_surface *src_surface,
781                                VARectangle *dst_rect,
782                                struct i965_surface *dst_surface)
783 {
784     unsigned int fourcc;
785     int width[3], height[3], pitch[3], bo_offset[3];
786     dri_bo *bo;
787     struct object_surface *obj_surface;
788     struct object_image *obj_image;
789     int bti;
790
791     if ((gpe_context == NULL) ||
792         (src_rect == NULL) || (src_surface == NULL) ||
793         (dst_rect == NULL) || (dst_surface == NULL))
794         return;
795
796     if (src_surface->base == NULL || dst_surface->base == NULL)
797         return;
798
799     fourcc = pp_get_surface_fourcc(ctx, src_surface);
800
801     if (src_surface->type == I965_SURFACE_TYPE_SURFACE) {
802         obj_surface = (struct object_surface *)src_surface->base;
803         bo = obj_surface->bo;
804     } else {
805         obj_image = (struct object_image *)src_surface->base;
806         bo = obj_image->bo;
807     }
808
809     bti = 0;
810     if (gen9_pp_context_get_surface_conf(ctx, src_surface, src_rect,
811                                          width, height, pitch,
812                                          bo_offset)) {
813         bti = BTI_SCALING_INPUT_Y;
814         /* Input surface */
815         gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
816                                            bo_offset[0],
817                                            width[0], height[0],
818                                            pitch[0], 0,
819                                            I965_SURFACEFORMAT_R16_UNORM,
820                                            bti, 1);
821         if (fourcc == VA_FOURCC_P010) {
822             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
823                                            bo_offset[1],
824                                            width[1], height[1],
825                                            pitch[1], 0,
826                                            I965_SURFACEFORMAT_R16G16_UNORM,
827                                            bti + 1, 1);
828         } else {
829             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
830                                            bo_offset[1],
831                                            width[1], height[1],
832                                            pitch[1], 0,
833                                            I965_SURFACEFORMAT_R16_UNORM,
834                                            bti + 1, 1);
835
836             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
837                                            bo_offset[2],
838                                            width[2], height[2],
839                                            pitch[2], 0,
840                                            I965_SURFACEFORMAT_R16_UNORM,
841                                            bti + 2, 1);
842         }
843     }
844
845     fourcc = pp_get_surface_fourcc(ctx, dst_surface);
846
847     if (dst_surface->type == I965_SURFACE_TYPE_SURFACE) {
848         obj_surface = (struct object_surface *)dst_surface->base;
849         bo = obj_surface->bo;
850     } else {
851         obj_image = (struct object_image *)dst_surface->base;
852         bo = obj_image->bo;
853     }
854
855     if (gen9_pp_context_get_surface_conf(ctx, dst_surface, dst_rect,
856                                          width, height, pitch,
857                                          bo_offset)) {
858         bti = BTI_SCALING_OUTPUT_Y;
859         /* Input surface */
860         gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
861                                            bo_offset[0],
862                                            width[0], height[0],
863                                            pitch[0], 1,
864                                            I965_SURFACEFORMAT_R16_UINT,
865                                            bti, 1);
866         if (fourcc == VA_FOURCC_P010) {
867             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
868                                            bo_offset[1],
869                                            width[1] * 2, height[1],
870                                            pitch[1], 1,
871                                            I965_SURFACEFORMAT_R16_UINT,
872                                            bti + 1, 1);
873         } else {
874             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
875                                            bo_offset[1],
876                                            width[1], height[1],
877                                            pitch[1], 1,
878                                            I965_SURFACEFORMAT_R16_UINT,
879                                            bti + 1, 1);
880
881             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
882                                            bo_offset[2],
883                                            width[2], height[2],
884                                            pitch[2], 1,
885                                            I965_SURFACEFORMAT_R16_UINT,
886                                            bti + 2, 1);
887         }
888     }
889
890     return;
891 }
892
893 VAStatus
894 gen9_p010_scaling_post_processing(
895     VADriverContextP   ctx,
896     struct i965_post_processing_context *pp_context,
897     struct i965_surface *src_surface,
898     VARectangle *src_rect,
899     struct i965_surface *dst_surface,
900     VARectangle *dst_rect)
901 {
902     struct i965_gpe_context *gpe_context;
903     struct gpe_media_object_walker_parameter media_object_walker_param;
904     struct intel_vpp_kernel_walker_parameter kernel_walker_param;
905
906     if (!pp_context || !src_surface || !src_rect || !dst_surface || !dst_rect)
907         return VA_STATUS_ERROR_INVALID_PARAMETER;
908
909     if (!pp_context->scaling_context_initialized)
910         return VA_STATUS_ERROR_UNIMPLEMENTED;
911
912     gpe_context = &pp_context->scaling_10bit_context;
913
914     gen8_gpe_context_init(ctx, gpe_context);
915     gen9_p010_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect);
916     gen9_gpe_reset_binding_table(ctx, gpe_context);
917     gen9_gpe_context_p010_scaling_curbe(ctx, gpe_context,
918                                         src_rect, src_surface,
919                                         dst_rect, dst_surface);
920
921     gen9_gpe_context_p010_scaling_surfaces(ctx, gpe_context,
922                                         src_rect, src_surface,
923                                         dst_rect, dst_surface);
924
925     gen8_gpe_setup_interface_data(ctx, gpe_context);
926
927     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
928     kernel_walker_param.resolution_x = ALIGN(dst_rect->width, 16) >> 4;
929     kernel_walker_param.resolution_y = ALIGN(dst_rect->height, 16) >> 4;
930     kernel_walker_param.no_dependency = 1;
931
932     intel_vpp_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
933
934     gen9_run_kernel_media_object_walker(ctx, pp_context->batch,
935                                         gpe_context,
936                                         &media_object_walker_param);
937
938     return VA_STATUS_SUCCESS;
939 }