OSDN Git Service

intel-vaapi-driver 1.8.1.pre1
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_post_processing.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <assert.h>
30
31 #include "intel_batchbuffer.h"
32 #include "intel_driver.h"
33 #include "i965_defines.h"
34 #include "i965_structs.h"
35 #include "i965_drv_video.h"
36 #include "i965_post_processing.h"
37 #include "i965_render.h"
38 #include "intel_media.h"
39
40 #include "gen8_post_processing.h"
41 #include "gen75_picture_process.h"
42 #include "intel_gen_vppapi.h"
43 #include "intel_common_vpp_internal.h"
44
45 static const uint32_t pp_null_gen9[][4] = {
46 };
47
48 static const uint32_t pp_nv12_load_save_nv12_gen9[][4] = {
49 #include "shaders/post_processing/gen9/pl2_to_pl2.g9b"
50 };
51
52 static const uint32_t pp_nv12_load_save_pl3_gen9[][4] = {
53 #include "shaders/post_processing/gen9/pl2_to_pl3.g9b"
54 };
55
56 static const uint32_t pp_pl3_load_save_nv12_gen9[][4] = {
57 #include "shaders/post_processing/gen9/pl3_to_pl2.g9b"
58 };
59
60 static const uint32_t pp_pl3_load_save_pl3_gen9[][4] = {
61 #include "shaders/post_processing/gen9/pl3_to_pl3.g9b"
62 };
63
64 static const uint32_t pp_nv12_scaling_gen9[][4] = {
65 #include "shaders/post_processing/gen9/pl2_to_pl2.g9b"
66 };
67
68 static const uint32_t pp_nv12_avs_gen9[][4] = {
69 #include "shaders/post_processing/gen9/pl2_to_pl2.g9b"
70 };
71
72 static const uint32_t pp_nv12_dndi_gen9[][4] = {
73 };
74
75 static const uint32_t pp_nv12_dn_gen9[][4] = {
76 };
77
78 static const uint32_t pp_nv12_load_save_pa_gen9[][4] = {
79 #include "shaders/post_processing/gen9/pl2_to_pa.g9b"
80 };
81
82 static const uint32_t pp_pl3_load_save_pa_gen9[][4] = {
83 #include "shaders/post_processing/gen9/pl3_to_pa.g9b"
84 };
85
86 static const uint32_t pp_pa_load_save_nv12_gen9[][4] = {
87 #include "shaders/post_processing/gen9/pa_to_pl2.g9b"
88 };
89
90 static const uint32_t pp_pa_load_save_pl3_gen9[][4] = {
91 #include "shaders/post_processing/gen9/pa_to_pl3.g9b"
92 };
93
94 static const uint32_t pp_pa_load_save_pa_gen9[][4] = {
95 #include "shaders/post_processing/gen9/pa_to_pa.g9b"
96 };
97
98 static const uint32_t pp_rgbx_load_save_nv12_gen9[][4] = {
99 #include "shaders/post_processing/gen9/rgbx_to_nv12.g9b"
100 };
101
102 static const uint32_t pp_nv12_load_save_rgbx_gen9[][4] = {
103 #include "shaders/post_processing/gen9/pl2_to_rgbx.g9b"
104 };
105
106 static const uint32_t pp_nv12_blending_gen9[][4] = {
107 };
108
109 #define MAX_SCALING_SURFACES    16
110
111 #define DEFAULT_MOCS    0x02
112
113 static const uint32_t pp_10bit_scaling_gen9[][4] = {
114 #include "shaders/post_processing/gen9/conv_p010.g9b"
115 };
116
117 static const uint32_t pp_yuv420p8_scaling_gen9[][4] = {
118 #include "shaders/post_processing/gen9/conv_nv12.g9b"
119 };
120
121 static struct pp_module pp_modules_gen9[] = {
122     {
123         {
124             "NULL module (for testing)",
125             PP_NULL,
126             pp_null_gen9,
127             sizeof(pp_null_gen9),
128             NULL,
129         },
130
131         pp_null_initialize,
132     },
133
134     {
135         {
136             "NV12_NV12",
137             PP_NV12_LOAD_SAVE_N12,
138             pp_nv12_load_save_nv12_gen9,
139             sizeof(pp_nv12_load_save_nv12_gen9),
140             NULL,
141         },
142
143         gen8_pp_plx_avs_initialize,
144     },
145
146     {
147         {
148             "NV12_PL3",
149             PP_NV12_LOAD_SAVE_PL3,
150             pp_nv12_load_save_pl3_gen9,
151             sizeof(pp_nv12_load_save_pl3_gen9),
152             NULL,
153         },
154         gen8_pp_plx_avs_initialize,
155     },
156
157     {
158         {
159             "PL3_NV12",
160             PP_PL3_LOAD_SAVE_N12,
161             pp_pl3_load_save_nv12_gen9,
162             sizeof(pp_pl3_load_save_nv12_gen9),
163             NULL,
164         },
165
166         gen8_pp_plx_avs_initialize,
167     },
168
169     {
170         {
171             "PL3_PL3",
172             PP_PL3_LOAD_SAVE_PL3,
173             pp_pl3_load_save_pl3_gen9,
174             sizeof(pp_pl3_load_save_pl3_gen9),
175             NULL,
176         },
177
178         gen8_pp_plx_avs_initialize,
179     },
180
181     {
182         {
183             "NV12 Scaling module",
184             PP_NV12_SCALING,
185             pp_nv12_scaling_gen9,
186             sizeof(pp_nv12_scaling_gen9),
187             NULL,
188         },
189
190         gen8_pp_plx_avs_initialize,
191     },
192
193     {
194         {
195             "NV12 AVS module",
196             PP_NV12_AVS,
197             pp_nv12_avs_gen9,
198             sizeof(pp_nv12_avs_gen9),
199             NULL,
200         },
201
202         gen8_pp_plx_avs_initialize,
203     },
204
205     {
206         {
207             "NV12 DNDI module",
208             PP_NV12_DNDI,
209             pp_nv12_dndi_gen9,
210             sizeof(pp_nv12_dndi_gen9),
211             NULL,
212         },
213
214         pp_null_initialize,
215     },
216
217     {
218         {
219             "NV12 DN module",
220             PP_NV12_DN,
221             pp_nv12_dn_gen9,
222             sizeof(pp_nv12_dn_gen9),
223             NULL,
224         },
225
226         pp_null_initialize,
227     },
228     {
229         {
230             "NV12_PA module",
231             PP_NV12_LOAD_SAVE_PA,
232             pp_nv12_load_save_pa_gen9,
233             sizeof(pp_nv12_load_save_pa_gen9),
234             NULL,
235         },
236
237         gen8_pp_plx_avs_initialize,
238     },
239
240     {
241         {
242             "PL3_PA module",
243             PP_PL3_LOAD_SAVE_PA,
244             pp_pl3_load_save_pa_gen9,
245             sizeof(pp_pl3_load_save_pa_gen9),
246             NULL,
247         },
248
249         gen8_pp_plx_avs_initialize,
250     },
251
252     {
253         {
254             "PA_NV12 module",
255             PP_PA_LOAD_SAVE_NV12,
256             pp_pa_load_save_nv12_gen9,
257             sizeof(pp_pa_load_save_nv12_gen9),
258             NULL,
259         },
260
261         gen8_pp_plx_avs_initialize,
262     },
263
264     {
265         {
266             "PA_PL3 module",
267             PP_PA_LOAD_SAVE_PL3,
268             pp_pa_load_save_pl3_gen9,
269             sizeof(pp_pa_load_save_pl3_gen9),
270             NULL,
271         },
272
273         gen8_pp_plx_avs_initialize,
274     },
275
276     {
277         {
278             "PA_PA module",
279             PP_PA_LOAD_SAVE_PA,
280             pp_pa_load_save_pa_gen9,
281             sizeof(pp_pa_load_save_pa_gen9),
282             NULL,
283         },
284
285         gen8_pp_plx_avs_initialize,
286     },
287
288     {
289         {
290             "RGBX_NV12 module",
291             PP_RGBX_LOAD_SAVE_NV12,
292             pp_rgbx_load_save_nv12_gen9,
293             sizeof(pp_rgbx_load_save_nv12_gen9),
294             NULL,
295         },
296
297         gen8_pp_plx_avs_initialize,
298     },
299
300     {
301         {
302             "NV12_RGBX module",
303             PP_NV12_LOAD_SAVE_RGBX,
304             pp_nv12_load_save_rgbx_gen9,
305             sizeof(pp_nv12_load_save_rgbx_gen9),
306             NULL,
307         },
308
309         gen8_pp_plx_avs_initialize,
310     },
311 };
312
313 static const AVSConfig gen9_avs_config = {
314     .coeff_frac_bits = 6,
315     .coeff_epsilon = 1.0f / (1U << 6),
316     .num_phases = 31,
317     .num_luma_coeffs = 8,
318     .num_chroma_coeffs = 4,
319
320     .coeff_range = {
321         .lower_bound = {
322             .y_k_h = { -2, -2, -2, -2, -2, -2, -2, -2 },
323             .y_k_v = { -2, -2, -2, -2, -2, -2, -2, -2 },
324             .uv_k_h = { -2, -2, -2, -2 },
325             .uv_k_v = { -2, -2, -2, -2 },
326         },
327         .upper_bound = {
328             .y_k_h = { 2, 2, 2, 2, 2, 2, 2, 2 },
329             .y_k_v = { 2, 2, 2, 2, 2, 2, 2, 2 },
330             .uv_k_h = { 2, 2, 2, 2 },
331             .uv_k_v = { 2, 2, 2, 2 },
332         },
333     },
334 };
335
336 static void
337 gen9_pp_pipeline_select(VADriverContextP ctx,
338                         struct i965_post_processing_context *pp_context)
339 {
340     struct intel_batchbuffer *batch = pp_context->batch;
341
342     BEGIN_BATCH(batch, 1);
343     OUT_BATCH(batch,
344               CMD_PIPELINE_SELECT |
345               PIPELINE_SELECT_MEDIA |
346               GEN9_FORCE_MEDIA_AWAKE_ON |
347               GEN9_MEDIA_DOP_GATE_OFF |
348               GEN9_PIPELINE_SELECTION_MASK |
349               GEN9_MEDIA_DOP_GATE_MASK |
350               GEN9_FORCE_MEDIA_AWAKE_MASK);
351     ADVANCE_BATCH(batch);
352 }
353
354 static void
355 gen9_pp_state_base_address(VADriverContextP ctx,
356                            struct i965_post_processing_context *pp_context)
357 {
358     struct intel_batchbuffer *batch = pp_context->batch;
359
360     BEGIN_BATCH(batch, 19);
361     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
362     /* DW1 Generate state address */
363     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
364     OUT_BATCH(batch, 0);
365     OUT_BATCH(batch, 0);
366     /* DW4-5 Surface state address */
367     OUT_RELOC64(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
368     /* DW6-7 Dynamic state address */
369     OUT_RELOC64(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
370               0, 0 | BASE_ADDRESS_MODIFY);
371
372     /* DW8. Indirect object address */
373     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
374     OUT_BATCH(batch, 0);
375
376     /* DW10-11 Instruction base address */
377     OUT_RELOC64(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
378
379     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
380     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
381     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
382     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
383
384     /* Bindless surface state base address */
385     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
386     OUT_BATCH(batch, 0);
387     OUT_BATCH(batch, 0xfffff000);
388
389     ADVANCE_BATCH(batch);
390 }
391
392 static void
393 gen9_pp_end_pipeline(VADriverContextP ctx,
394                      struct i965_post_processing_context *pp_context)
395 {
396     struct intel_batchbuffer *batch = pp_context->batch;
397
398     BEGIN_BATCH(batch, 1);
399     OUT_BATCH(batch,
400               CMD_PIPELINE_SELECT |
401               PIPELINE_SELECT_MEDIA |
402               GEN9_FORCE_MEDIA_AWAKE_OFF |
403               GEN9_MEDIA_DOP_GATE_ON |
404               GEN9_PIPELINE_SELECTION_MASK |
405               GEN9_MEDIA_DOP_GATE_MASK |
406               GEN9_FORCE_MEDIA_AWAKE_MASK);
407     ADVANCE_BATCH(batch);
408 }
409
410 static void
411 gen9_pp_pipeline_setup(VADriverContextP ctx,
412                        struct i965_post_processing_context *pp_context)
413 {
414     struct intel_batchbuffer *batch = pp_context->batch;
415
416     intel_batchbuffer_start_atomic(batch, 0x1000);
417     intel_batchbuffer_emit_mi_flush(batch);
418     gen9_pp_pipeline_select(ctx, pp_context);
419     gen9_pp_state_base_address(ctx, pp_context);
420     gen8_pp_vfe_state(ctx, pp_context);
421     gen8_pp_curbe_load(ctx, pp_context);
422     gen8_interface_descriptor_load(ctx, pp_context);
423     gen8_pp_object_walker(ctx, pp_context);
424     gen9_pp_end_pipeline(ctx, pp_context);
425     intel_batchbuffer_end_atomic(batch);
426 }
427
428 static VAStatus
429 gen9_post_processing(VADriverContextP ctx,
430                      struct i965_post_processing_context *pp_context,
431                      const struct i965_surface *src_surface,
432                      const VARectangle *src_rect,
433                      struct i965_surface *dst_surface,
434                      const VARectangle *dst_rect,
435                      int pp_index,
436                      void * filter_param)
437 {
438     VAStatus va_status;
439
440     va_status = gen8_pp_initialize(ctx, pp_context,
441                                    src_surface,
442                                    src_rect,
443                                    dst_surface,
444                                    dst_rect,
445                                    pp_index,
446                                    filter_param);
447
448     if (va_status == VA_STATUS_SUCCESS) {
449         gen8_pp_states_setup(ctx, pp_context);
450         gen9_pp_pipeline_setup(ctx, pp_context);
451     }
452
453     return va_status;
454 }
455
456 static void
457 gen9_vpp_scaling_sample_state(VADriverContextP ctx,
458                                struct i965_gpe_context *gpe_context,
459                                VARectangle *src_rect,
460                                VARectangle *dst_rect)
461 {
462     struct gen8_sampler_state *sampler_state;
463
464     if (gpe_context == NULL || !src_rect || !dst_rect)
465         return;
466     dri_bo_map(gpe_context->sampler.bo, 1);
467
468     if (gpe_context->sampler.bo->virtual == NULL)
469         return;
470
471     assert(gpe_context->sampler.bo->virtual);
472
473     sampler_state = (struct gen8_sampler_state *)
474        (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
475
476     memset(sampler_state, 0, sizeof(*sampler_state));
477
478     if ((src_rect->width == dst_rect->width) &&
479         (src_rect->height == dst_rect->height)) {
480         sampler_state->ss0.min_filter = I965_MAPFILTER_NEAREST;
481         sampler_state->ss0.mag_filter = I965_MAPFILTER_NEAREST;
482     } else {
483         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
484         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
485     }
486
487     sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
488     sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
489     sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
490
491     dri_bo_unmap(gpe_context->sampler.bo);
492 }
493
494 void
495 gen9_post_processing_context_init(VADriverContextP ctx,
496                                   void *data,
497                                   struct intel_batchbuffer *batch)
498 {
499     struct i965_driver_data *i965 = i965_driver_data(ctx);
500     struct i965_post_processing_context *pp_context = data;
501     struct i965_gpe_context *gpe_context;
502     struct i965_kernel scaling_kernel;
503
504     gen8_post_processing_context_common_init(ctx, data, pp_modules_gen9, ARRAY_ELEMS(pp_modules_gen9), batch);
505     avs_init_state(&pp_context->pp_avs_context.state, &gen9_avs_config);
506
507     pp_context->intel_post_processing = gen9_post_processing;
508
509     gpe_context = &pp_context->scaling_10bit_context;
510     memset(&scaling_kernel, 0, sizeof(scaling_kernel));
511     scaling_kernel.bin = pp_10bit_scaling_gen9;
512     scaling_kernel.size = sizeof(pp_10bit_scaling_gen9);
513     gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
514     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
515     gpe_context->idrt.max_entries = 1;
516     gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
517     gpe_context->sampler.max_entries = 1;
518     gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
519
520     gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
521     gpe_context->surface_state_binding_table.binding_table_offset = 0;
522     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
523     gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
524
525     if (i965->intel.eu_total > 0) {
526         gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
527     } else {
528         if (i965->intel.has_bsd2)
529             gpe_context->vfe_state.max_num_threads = 300;
530         else
531             gpe_context->vfe_state.max_num_threads = 60;
532     }
533
534     gpe_context->vfe_state.curbe_allocation_size = 37;
535     gpe_context->vfe_state.urb_entry_size = 16;
536     gpe_context->vfe_state.num_urb_entries = 127;
537     gpe_context->vfe_state.gpgpu_mode = 0;
538
539     gen8_gpe_context_init(ctx, gpe_context);
540     pp_context->scaling_context_initialized = 1;
541
542     /* initialize the YUV420 8-Bit scaling context. The below is supported.
543      * NV12 ->NV12
544      * NV12 ->I420
545      * I420 ->I420
546      * I420 ->NV12
547      */
548     gpe_context = &pp_context->scaling_yuv420p8_context;
549     memset(&scaling_kernel, 0, sizeof(scaling_kernel));
550     scaling_kernel.bin = pp_yuv420p8_scaling_gen9;
551     scaling_kernel.size = sizeof(pp_yuv420p8_scaling_gen9);
552     gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
553     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
554     gpe_context->idrt.max_entries = 1;
555     gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
556     gpe_context->sampler.max_entries = 1;
557     gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32);
558
559     gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
560     gpe_context->surface_state_binding_table.binding_table_offset = 0;
561     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
562     gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
563
564     if (i965->intel.eu_total > 0) {
565         gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
566     } else {
567         if (i965->intel.has_bsd2)
568             gpe_context->vfe_state.max_num_threads = 300;
569         else
570             gpe_context->vfe_state.max_num_threads = 60;
571     }
572
573     gpe_context->vfe_state.curbe_allocation_size = 37;
574     gpe_context->vfe_state.urb_entry_size = 16;
575     gpe_context->vfe_state.num_urb_entries = 127;
576     gpe_context->vfe_state.gpgpu_mode = 0;
577
578     gen8_gpe_context_init(ctx, gpe_context);
579     pp_context->scaling_8bit_initialized = VPPGPE_8BIT_420;
580     return;
581 }
582
583 static void
584 gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
585                                    struct i965_gpe_context *gpe_context,
586                                    dri_bo *bo,
587                                    unsigned int bo_offset,
588                                    unsigned int width,
589                                    unsigned int height,
590                                    unsigned int pitch,
591                                    int is_media_block_rw,
592                                    unsigned int format,
593                                    int index,
594                                    int is_10bit)
595 {
596     struct i965_gpe_resource gpe_resource;
597     struct i965_gpe_surface gpe_surface;
598
599     i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
600     memset(&gpe_surface, 0, sizeof(gpe_surface));
601     gpe_surface.gpe_resource = &gpe_resource;
602     gpe_surface.is_2d_surface = 1;
603     gpe_surface.is_media_block_rw = !!is_media_block_rw;
604     gpe_surface.cacheability_control = DEFAULT_MOCS;
605     gpe_surface.format = format;
606     gpe_surface.is_override_offset = 1;
607     gpe_surface.offset = bo_offset;
608     gpe_surface.is_16bpp = is_10bit;
609
610     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
611
612     i965_free_gpe_resource(&gpe_resource);
613 }
614
615 static void
616 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
617                                     struct intel_batchbuffer *batch,
618                                     struct i965_gpe_context *gpe_context,
619                                     struct gpe_media_object_walker_parameter *param)
620 {
621     if (!batch || !gpe_context || !param)
622         return;
623
624     intel_batchbuffer_start_atomic(batch, 0x1000);
625
626     intel_batchbuffer_emit_mi_flush(batch);
627
628     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
629     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
630     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
631
632     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
633
634     intel_batchbuffer_end_atomic(batch);
635
636     intel_batchbuffer_flush(batch);
637     return;
638 }
639
640 static unsigned int
641 pp_get_surface_fourcc(VADriverContextP ctx, struct i965_surface *surface)
642 {
643     unsigned int fourcc;
644
645     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
646         struct object_image *obj_image = (struct object_image *)surface->base;
647         fourcc = obj_image->image.format.fourcc;
648     } else {
649         struct object_surface *obj_surface = (struct object_surface *)surface->base;
650         fourcc = obj_surface->fourcc;
651     }
652
653     return fourcc;
654 }
655
656 static void
657 gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
658                                struct i965_gpe_context *gpe_context,
659                                VARectangle *src_rect,
660                                struct i965_surface *src_surface,
661                                VARectangle *dst_rect,
662                                struct i965_surface *dst_surface)
663 {
664     struct scaling_input_parameter *scaling_curbe;
665     float src_width, src_height;
666     float coeff;
667     unsigned int fourcc;
668
669     if ((gpe_context == NULL) ||
670         (src_rect == NULL) || (src_surface == NULL) ||
671         (dst_rect == NULL) || (dst_surface == NULL))
672         return;
673
674     scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
675
676     if (!scaling_curbe)
677         return;
678
679     memset(scaling_curbe, 0, sizeof(struct scaling_input_parameter));
680
681     scaling_curbe->bti_input = BTI_SCALING_INPUT_Y;
682     scaling_curbe->bti_output = BTI_SCALING_OUTPUT_Y;
683
684     /* As the src_rect/dst_rect is already checked, it is skipped.*/
685     scaling_curbe->x_dst     = dst_rect->x;
686     scaling_curbe->y_dst     = dst_rect->y;
687
688     src_width = src_rect->x + src_rect->width;
689     src_height = src_rect->y + src_rect->height;
690
691     scaling_curbe->inv_width = 1 / src_width;
692     scaling_curbe->inv_height = 1 / src_height;
693
694     coeff = (float) (src_rect->width) / dst_rect->width;
695     scaling_curbe->x_factor = coeff / src_width;
696     scaling_curbe->x_orig = (float)(src_rect->x) / src_width;
697
698     coeff = (float) (src_rect->height) / dst_rect->height;
699     scaling_curbe->y_factor = coeff / src_height;
700     scaling_curbe->y_orig = (float)(src_rect->y) / src_height;
701
702     fourcc = pp_get_surface_fourcc(ctx, src_surface);
703     if (fourcc == VA_FOURCC_P010) {
704         scaling_curbe->dw7.src_packed = 1;
705         scaling_curbe->dw7.src_msb = 1;
706     }
707     /* I010 will use LSB */
708
709     fourcc = pp_get_surface_fourcc(ctx, dst_surface);
710
711     if (fourcc == VA_FOURCC_P010) {
712         scaling_curbe->dw7.dst_packed = 1;
713         scaling_curbe->dw7.dst_msb = 1;
714     }
715     /* I010 will use LSB */
716
717     i965_gpe_context_unmap_curbe(gpe_context);
718 }
719
720 static bool
721 gen9_pp_context_get_surface_conf(VADriverContextP ctx,
722                                  struct i965_surface *surface,
723                                  VARectangle *rect,
724                                  int *width,
725                                  int *height,
726                                  int *pitch,
727                                  int *bo_offset)
728 {
729     unsigned int fourcc;
730     if (!rect || !surface || !width || !height || !pitch || !bo_offset)
731         return false;
732
733     if (surface->base == NULL)
734         return false;
735
736     fourcc = pp_get_surface_fourcc(ctx, surface);
737     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
738         struct object_surface *obj_surface;
739
740         obj_surface = (struct object_surface *)surface->base;
741         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
742         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
743         pitch[0] = obj_surface->width;
744         bo_offset[0] = 0;
745
746         if (fourcc == VA_FOURCC_P010 || fourcc == VA_FOURCC_NV12) {
747             width[1] = width[0] / 2;
748             height[1] = height[0] / 2;
749             pitch[1] = obj_surface->cb_cr_pitch;
750             bo_offset[1] = obj_surface->width * obj_surface->y_cb_offset;
751         } else {
752             /* I010/I420 format */
753             width[1] = width[0] / 2;
754             height[1] = height[0] / 2;
755             pitch[1] = obj_surface->cb_cr_pitch;
756             bo_offset[1] = obj_surface->width * obj_surface->y_cb_offset;
757             width[2] = width[0] / 2;
758             height[2] = height[0] / 2;
759             pitch[2] = obj_surface->cb_cr_pitch;
760             bo_offset[2] = obj_surface->width * obj_surface->y_cr_offset;
761         }
762
763     } else {
764         struct object_image *obj_image;
765
766         obj_image = (struct object_image *)surface->base;
767
768         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
769         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
770         pitch[0] = obj_image->image.pitches[0];
771         bo_offset[0] = obj_image->image.offsets[0];
772
773         if (fourcc == VA_FOURCC_P010 || fourcc == VA_FOURCC_NV12) {
774             width[1] = width[0] / 2;
775             height[1] = height[0] / 2;
776             pitch[1] = obj_image->image.pitches[1];
777             bo_offset[1] = obj_image->image.offsets[1];
778         } else {
779             /* I010/I420 format */
780             width[1] = width[0] / 2;
781             height[1] = height[0] / 2;
782             pitch[1] = obj_image->image.pitches[1];
783             bo_offset[1] = obj_image->image.offsets[1];
784             width[2] = width[0] / 2;
785             height[2] = height[0] / 2;
786             pitch[2] = obj_image->image.pitches[2];
787             bo_offset[2] = obj_image->image.offsets[2];
788         }
789
790     }
791
792     return true;
793 }
794
795 static void
796 gen9_gpe_context_p010_scaling_surfaces(VADriverContextP ctx,
797                                struct i965_gpe_context *gpe_context,
798                                VARectangle *src_rect,
799                                struct i965_surface *src_surface,
800                                VARectangle *dst_rect,
801                                struct i965_surface *dst_surface)
802 {
803     unsigned int fourcc;
804     int width[3], height[3], pitch[3], bo_offset[3];
805     dri_bo *bo;
806     struct object_surface *obj_surface;
807     struct object_image *obj_image;
808     int bti;
809
810     if ((gpe_context == NULL) ||
811         (src_rect == NULL) || (src_surface == NULL) ||
812         (dst_rect == NULL) || (dst_surface == NULL))
813         return;
814
815     if (src_surface->base == NULL || dst_surface->base == NULL)
816         return;
817
818     fourcc = pp_get_surface_fourcc(ctx, src_surface);
819
820     if (src_surface->type == I965_SURFACE_TYPE_SURFACE) {
821         obj_surface = (struct object_surface *)src_surface->base;
822         bo = obj_surface->bo;
823     } else {
824         obj_image = (struct object_image *)src_surface->base;
825         bo = obj_image->bo;
826     }
827
828     bti = 0;
829     if (gen9_pp_context_get_surface_conf(ctx, src_surface, src_rect,
830                                          width, height, pitch,
831                                          bo_offset)) {
832         bti = BTI_SCALING_INPUT_Y;
833         /* Input surface */
834         gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
835                                            bo_offset[0],
836                                            width[0], height[0],
837                                            pitch[0], 0,
838                                            I965_SURFACEFORMAT_R16_UNORM,
839                                            bti, 1);
840         if (fourcc == VA_FOURCC_P010) {
841             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
842                                            bo_offset[1],
843                                            width[1], height[1],
844                                            pitch[1], 0,
845                                            I965_SURFACEFORMAT_R16G16_UNORM,
846                                            bti + 1, 1);
847         } else {
848             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
849                                            bo_offset[1],
850                                            width[1], height[1],
851                                            pitch[1], 0,
852                                            I965_SURFACEFORMAT_R16_UNORM,
853                                            bti + 1, 1);
854
855             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
856                                            bo_offset[2],
857                                            width[2], height[2],
858                                            pitch[2], 0,
859                                            I965_SURFACEFORMAT_R16_UNORM,
860                                            bti + 2, 1);
861         }
862     }
863
864     fourcc = pp_get_surface_fourcc(ctx, dst_surface);
865
866     if (dst_surface->type == I965_SURFACE_TYPE_SURFACE) {
867         obj_surface = (struct object_surface *)dst_surface->base;
868         bo = obj_surface->bo;
869     } else {
870         obj_image = (struct object_image *)dst_surface->base;
871         bo = obj_image->bo;
872     }
873
874     if (gen9_pp_context_get_surface_conf(ctx, dst_surface, dst_rect,
875                                          width, height, pitch,
876                                          bo_offset)) {
877         bti = BTI_SCALING_OUTPUT_Y;
878         /* Input surface */
879         gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
880                                            bo_offset[0],
881                                            width[0], height[0],
882                                            pitch[0], 1,
883                                            I965_SURFACEFORMAT_R16_UINT,
884                                            bti, 1);
885         if (fourcc == VA_FOURCC_P010) {
886             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
887                                            bo_offset[1],
888                                            width[1] * 2, height[1],
889                                            pitch[1], 1,
890                                            I965_SURFACEFORMAT_R16_UINT,
891                                            bti + 1, 1);
892         } else {
893             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
894                                            bo_offset[1],
895                                            width[1], height[1],
896                                            pitch[1], 1,
897                                            I965_SURFACEFORMAT_R16_UINT,
898                                            bti + 1, 1);
899
900             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
901                                            bo_offset[2],
902                                            width[2], height[2],
903                                            pitch[2], 1,
904                                            I965_SURFACEFORMAT_R16_UINT,
905                                            bti + 2, 1);
906         }
907     }
908
909     return;
910 }
911
912 VAStatus
913 gen9_p010_scaling_post_processing(
914     VADriverContextP   ctx,
915     struct i965_post_processing_context *pp_context,
916     struct i965_surface *src_surface,
917     VARectangle *src_rect,
918     struct i965_surface *dst_surface,
919     VARectangle *dst_rect)
920 {
921     struct i965_gpe_context *gpe_context;
922     struct gpe_media_object_walker_parameter media_object_walker_param;
923     struct intel_vpp_kernel_walker_parameter kernel_walker_param;
924
925     if (!pp_context || !src_surface || !src_rect || !dst_surface || !dst_rect)
926         return VA_STATUS_ERROR_INVALID_PARAMETER;
927
928     if (!pp_context->scaling_context_initialized)
929         return VA_STATUS_ERROR_UNIMPLEMENTED;
930
931     gpe_context = &pp_context->scaling_10bit_context;
932
933     gen8_gpe_context_init(ctx, gpe_context);
934     gen9_vpp_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect);
935     gen9_gpe_reset_binding_table(ctx, gpe_context);
936     gen9_gpe_context_p010_scaling_curbe(ctx, gpe_context,
937                                         src_rect, src_surface,
938                                         dst_rect, dst_surface);
939
940     gen9_gpe_context_p010_scaling_surfaces(ctx, gpe_context,
941                                         src_rect, src_surface,
942                                         dst_rect, dst_surface);
943
944     gen8_gpe_setup_interface_data(ctx, gpe_context);
945
946     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
947     kernel_walker_param.resolution_x = ALIGN(dst_rect->width, 16) >> 4;
948     kernel_walker_param.resolution_y = ALIGN(dst_rect->height, 16) >> 4;
949     kernel_walker_param.no_dependency = 1;
950
951     intel_vpp_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
952
953     gen9_run_kernel_media_object_walker(ctx, pp_context->batch,
954                                         gpe_context,
955                                         &media_object_walker_param);
956
957     return VA_STATUS_SUCCESS;
958 }
959
960 static void
961 gen9_gpe_context_yuv420p8_scaling_curbe(VADriverContextP ctx,
962                                struct i965_gpe_context *gpe_context,
963                                VARectangle *src_rect,
964                                struct i965_surface *src_surface,
965                                VARectangle *dst_rect,
966                                struct i965_surface *dst_surface)
967 {
968     struct scaling_input_parameter *scaling_curbe;
969     float src_width, src_height;
970     float coeff;
971     unsigned int fourcc;
972
973     if ((gpe_context == NULL) ||
974         (src_rect == NULL) || (src_surface == NULL) ||
975         (dst_rect == NULL) || (dst_surface == NULL))
976         return;
977
978     scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
979
980     if (!scaling_curbe)
981         return;
982
983     memset(scaling_curbe, 0, sizeof(struct scaling_input_parameter));
984
985     scaling_curbe->bti_input = BTI_SCALING_INPUT_Y;
986     scaling_curbe->bti_output = BTI_SCALING_OUTPUT_Y;
987
988     /* As the src_rect/dst_rect is already checked, it is skipped.*/
989     scaling_curbe->x_dst     = dst_rect->x;
990     scaling_curbe->y_dst     = dst_rect->y;
991
992     src_width = src_rect->x + src_rect->width;
993     src_height = src_rect->y + src_rect->height;
994
995     scaling_curbe->inv_width = 1 / src_width;
996     scaling_curbe->inv_height = 1 / src_height;
997
998     coeff = (float) (src_rect->width) / dst_rect->width;
999     scaling_curbe->x_factor = coeff / src_width;
1000     scaling_curbe->x_orig = (float)(src_rect->x) / src_width;
1001
1002     coeff = (float) (src_rect->height) / dst_rect->height;
1003     scaling_curbe->y_factor = coeff / src_height;
1004     scaling_curbe->y_orig = (float)(src_rect->y) / src_height;
1005
1006     fourcc = pp_get_surface_fourcc(ctx, src_surface);
1007     if (fourcc == VA_FOURCC_NV12) {
1008         scaling_curbe->dw7.src_packed = 1;
1009     }
1010
1011     fourcc = pp_get_surface_fourcc(ctx, dst_surface);
1012
1013     if (fourcc == VA_FOURCC_NV12) {
1014         scaling_curbe->dw7.dst_packed = 1;
1015     }
1016
1017     i965_gpe_context_unmap_curbe(gpe_context);
1018 }
1019
1020 static void
1021 gen9_gpe_context_yuv420p8_scaling_surfaces(VADriverContextP ctx,
1022                                struct i965_gpe_context *gpe_context,
1023                                VARectangle *src_rect,
1024                                struct i965_surface *src_surface,
1025                                VARectangle *dst_rect,
1026                                struct i965_surface *dst_surface)
1027 {
1028     unsigned int fourcc;
1029     int width[3], height[3], pitch[3], bo_offset[3];
1030     dri_bo *bo;
1031     struct object_surface *obj_surface;
1032     struct object_image *obj_image;
1033     int bti;
1034
1035     if ((gpe_context == NULL) ||
1036         (src_rect == NULL) || (src_surface == NULL) ||
1037         (dst_rect == NULL) || (dst_surface == NULL))
1038         return;
1039
1040     if (src_surface->base == NULL || dst_surface->base == NULL)
1041         return;
1042
1043     fourcc = pp_get_surface_fourcc(ctx, src_surface);
1044
1045     if (src_surface->type == I965_SURFACE_TYPE_SURFACE) {
1046         obj_surface = (struct object_surface *)src_surface->base;
1047         bo = obj_surface->bo;
1048     } else {
1049         obj_image = (struct object_image *)src_surface->base;
1050         bo = obj_image->bo;
1051     }
1052
1053     bti = 0;
1054     if (gen9_pp_context_get_surface_conf(ctx, src_surface, src_rect,
1055                                          width, height, pitch,
1056                                          bo_offset)) {
1057         bti = BTI_SCALING_INPUT_Y;
1058         /* Input surface */
1059         gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1060                                            bo_offset[0],
1061                                            width[0], height[0],
1062                                            pitch[0], 0,
1063                                            I965_SURFACEFORMAT_R8_UNORM,
1064                                            bti, 0);
1065         if (fourcc == VA_FOURCC_NV12) {
1066             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1067                                            bo_offset[1],
1068                                            width[1], height[1],
1069                                            pitch[1], 0,
1070                                            I965_SURFACEFORMAT_R8G8_UNORM,
1071                                            bti + 1, 0);
1072         } else {
1073             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1074                                            bo_offset[1],
1075                                            width[1], height[1],
1076                                            pitch[1], 0,
1077                                            I965_SURFACEFORMAT_R8_UNORM,
1078                                            bti + 1, 0);
1079
1080             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1081                                            bo_offset[2],
1082                                            width[2], height[2],
1083                                            pitch[2], 0,
1084                                            I965_SURFACEFORMAT_R8_UNORM,
1085                                            bti + 2, 0);
1086         }
1087     }
1088
1089     fourcc = pp_get_surface_fourcc(ctx, dst_surface);
1090
1091     if (dst_surface->type == I965_SURFACE_TYPE_SURFACE) {
1092         obj_surface = (struct object_surface *)dst_surface->base;
1093         bo = obj_surface->bo;
1094     } else {
1095         obj_image = (struct object_image *)dst_surface->base;
1096         bo = obj_image->bo;
1097     }
1098
1099     if (gen9_pp_context_get_surface_conf(ctx, dst_surface, dst_rect,
1100                                          width, height, pitch,
1101                                          bo_offset)) {
1102         bti = BTI_SCALING_OUTPUT_Y;
1103         /* Input surface */
1104         gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1105                                            bo_offset[0],
1106                                            width[0], height[0],
1107                                            pitch[0], 1,
1108                                            I965_SURFACEFORMAT_R8_UINT,
1109                                            bti, 0);
1110         if (fourcc == VA_FOURCC_NV12) {
1111             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1112                                            bo_offset[1],
1113                                            width[1] * 2, height[1],
1114                                            pitch[1], 1,
1115                                            I965_SURFACEFORMAT_R16_UINT,
1116                                            bti + 1, 0);
1117         } else {
1118             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1119                                            bo_offset[1],
1120                                            width[1], height[1],
1121                                            pitch[1], 1,
1122                                            I965_SURFACEFORMAT_R8_UINT,
1123                                            bti + 1, 0);
1124
1125             gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
1126                                            bo_offset[2],
1127                                            width[2], height[2],
1128                                            pitch[2], 1,
1129                                            I965_SURFACEFORMAT_R8_UINT,
1130                                            bti + 2, 0);
1131         }
1132     }
1133
1134     return;
1135 }
1136
1137 VAStatus
1138 gen9_yuv420p8_scaling_post_processing(
1139     VADriverContextP   ctx,
1140     struct i965_post_processing_context *pp_context,
1141     struct i965_surface *src_surface,
1142     VARectangle *src_rect,
1143     struct i965_surface *dst_surface,
1144     VARectangle *dst_rect)
1145 {
1146     struct i965_gpe_context *gpe_context;
1147     struct gpe_media_object_walker_parameter media_object_walker_param;
1148     struct intel_vpp_kernel_walker_parameter kernel_walker_param;
1149
1150     if (!pp_context || !src_surface || !src_rect || !dst_surface || !dst_rect)
1151         return VA_STATUS_ERROR_INVALID_PARAMETER;
1152
1153     if (!(pp_context->scaling_8bit_initialized & VPPGPE_8BIT_420))
1154         return VA_STATUS_ERROR_UNIMPLEMENTED;
1155
1156     gpe_context = &pp_context->scaling_yuv420p8_context;
1157
1158     gen8_gpe_context_init(ctx, gpe_context);
1159     gen9_vpp_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect);
1160     gen9_gpe_reset_binding_table(ctx, gpe_context);
1161     gen9_gpe_context_yuv420p8_scaling_curbe(ctx, gpe_context,
1162                                         src_rect, src_surface,
1163                                         dst_rect, dst_surface);
1164
1165     gen9_gpe_context_yuv420p8_scaling_surfaces(ctx, gpe_context,
1166                                         src_rect, src_surface,
1167                                         dst_rect, dst_surface);
1168
1169     gen8_gpe_setup_interface_data(ctx, gpe_context);
1170
1171     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1172     kernel_walker_param.resolution_x = ALIGN(dst_rect->width, 16) >> 4;
1173     kernel_walker_param.resolution_y = ALIGN(dst_rect->height, 16) >> 4;
1174     kernel_walker_param.no_dependency = 1;
1175
1176     intel_vpp_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1177
1178     gen9_run_kernel_media_object_walker(ctx, pp_context->batch,
1179                                         gpe_context,
1180                                         &media_object_walker_param);
1181
1182     return VA_STATUS_SUCCESS;
1183 }