OSDN Git Service

Fix a typo
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_vpp_gpe.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *   Li Xiaowei <xiaowei.a.li@intel.com>
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
40
41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6      34
43
44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
46
47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
49
50 #define CURBE_ALLOCATION_SIZE   37
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
52 #define CURBE_URB_ENTRY_LENGTH  4
53
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
57 };
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
60 };
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
63 };
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
65     {
66         "vpp: sharpening(horizontal blur)",
67         VPP_GPE_SHARPENING,
68         gen75_gpe_sharpening_h_blur,
69         sizeof(gen75_gpe_sharpening_h_blur),
70         NULL
71     },
72     {
73         "vpp: sharpening(vertical blur)",
74         VPP_GPE_SHARPENING,
75         gen75_gpe_sharpening_v_blur,
76         sizeof(gen75_gpe_sharpening_v_blur),
77         NULL
78     },
79     {
80         "vpp: sharpening(unmask)",
81         VPP_GPE_SHARPENING,
82         gen75_gpe_sharpening_unmask,
83         sizeof(gen75_gpe_sharpening_unmask),
84         NULL
85     },
86 };
87
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90 #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
91 };
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93 #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
94 };
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96 #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
97 };
98
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
100     {
101         "vpp: sharpening(horizontal blur)",
102         VPP_GPE_SHARPENING,
103         gen8_gpe_sharpening_h_blur,
104         sizeof(gen8_gpe_sharpening_h_blur),
105         NULL
106     },
107     {
108         "vpp: sharpening(vertical blur)",
109         VPP_GPE_SHARPENING,
110         gen8_gpe_sharpening_v_blur,
111         sizeof(gen8_gpe_sharpening_v_blur),
112         NULL
113     },
114     {
115         "vpp: sharpening(unmask)",
116         VPP_GPE_SHARPENING,
117         gen8_gpe_sharpening_unmask,
118         sizeof(gen8_gpe_sharpening_unmask),
119         NULL
120     },
121 };
122
123 static VAStatus
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125                                  struct vpp_gpe_context *vpp_gpe_ctx)
126 {
127     struct object_surface *obj_surface;
128     unsigned int i = 0;
129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130                                        vpp_gpe_ctx->backward_surf_sum) * 2;
131
132     /* Binding input NV12 surfaces (Luma + Chroma)*/
133     for (i = 0; i < input_surface_sum; i += 2) {
134         obj_surface = vpp_gpe_ctx->surface_input_object[i / 2];
135         assert(obj_surface);
136         gen7_gpe_media_rw_surface_setup(ctx,
137                                         &vpp_gpe_ctx->gpe_ctx,
138                                         obj_surface,
139                                         BINDING_TABLE_OFFSET_GEN7(i),
140                                         SURFACE_STATE_OFFSET_GEN7(i),
141                                         0);
142
143         gen75_gpe_media_chroma_surface_setup(ctx,
144                                              &vpp_gpe_ctx->gpe_ctx,
145                                              obj_surface,
146                                              BINDING_TABLE_OFFSET_GEN7(i + 1),
147                                              SURFACE_STATE_OFFSET_GEN7(i + 1),
148                                              0);
149     }
150
151     /* Binding output NV12 surface(Luma + Chroma) */
152     obj_surface = vpp_gpe_ctx->surface_output_object;
153     assert(obj_surface);
154     gen7_gpe_media_rw_surface_setup(ctx,
155                                     &vpp_gpe_ctx->gpe_ctx,
156                                     obj_surface,
157                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
158                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum),
159                                     1);
160     gen75_gpe_media_chroma_surface_setup(ctx,
161                                          &vpp_gpe_ctx->gpe_ctx,
162                                          obj_surface,
163                                          BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
164                                          SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1),
165                                          1);
166     /* Bind kernel return buffer surface */
167     gen7_gpe_buffer_suface_setup(ctx,
168                                  &vpp_gpe_ctx->gpe_ctx,
169                                  &vpp_gpe_ctx->vpp_kernel_return,
170                                  BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
171                                  SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
172
173     return VA_STATUS_SUCCESS;
174 }
175
176 static VAStatus
177 gen75_gpe_process_interface_setup(VADriverContextP ctx,
178                                   struct vpp_gpe_context *vpp_gpe_ctx)
179 {
180     struct gen6_interface_descriptor_data *desc;
181     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
182     int i;
183
184     dri_bo_map(bo, 1);
185     assert(bo->virtual);
186     desc = bo->virtual;
187
188     /*Setup the descritor table*/
189     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) {
190         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
191         assert(sizeof(*desc) == 32);
192         memset(desc, 0, sizeof(*desc));
193         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
194         desc->desc2.sampler_count = 0; /* FIXME: */
195         desc->desc2.sampler_state_pointer = 0;
196         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
197         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
198         desc->desc4.constant_urb_entry_read_offset = 0;
199         desc->desc4.constant_urb_entry_read_length = 0;
200
201         dri_bo_emit_reloc(bo,
202                           I915_GEM_DOMAIN_INSTRUCTION, 0,
203                           0,
204                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
205                           kernel->bo);
206         desc++;
207     }
208
209     dri_bo_unmap(bo);
210
211     return VA_STATUS_SUCCESS;
212 }
213
214 static VAStatus
215 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
216                                   struct vpp_gpe_context *vpp_gpe_ctx)
217 {
218     unsigned int *command_ptr;
219     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
220     unsigned char* position = NULL;
221
222     /* Thread inline data setting*/
223     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
224     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
225
226     for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) {
227         *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2));
228         *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
229         *command_ptr++ = 0;
230         *command_ptr++ = 0;
231         *command_ptr++ = 0;
232         *command_ptr++ = 0;
233
234         /* copy thread inline data */
235         position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
236         memcpy(command_ptr, position, size);
237         command_ptr += size / sizeof(int);
238     }
239
240     *command_ptr++ = 0;
241     *command_ptr++ = MI_BATCH_BUFFER_END;
242
243     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
244
245     return VA_STATUS_SUCCESS;
246 }
247
248 static VAStatus
249 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
250                                  struct vpp_gpe_context *vpp_gpe_ctx)
251 {
252     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
253     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
254
255     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
256
257     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
258
259     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
260     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
261     OUT_RELOC(vpp_gpe_ctx->batch,
262               vpp_gpe_ctx->vpp_batchbuffer.bo,
263               I915_GEM_DOMAIN_COMMAND, 0,
264               0);
265     ADVANCE_BATCH(vpp_gpe_ctx->batch);
266
267     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
268
269     return VA_STATUS_SUCCESS;
270 }
271
272 static VAStatus
273 gen75_gpe_process_init(VADriverContextP ctx,
274                        struct vpp_gpe_context *vpp_gpe_ctx)
275 {
276     struct i965_driver_data *i965 = i965_driver_data(ctx);
277     dri_bo *bo;
278
279     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
280                                   (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
281
282     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
283     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
284     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
285     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
286                                        * vpp_gpe_ctx->vpp_kernel_return.size_block;
287
288     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
289     bo = dri_bo_alloc(i965->intel.bufmgr,
290                       "vpp batch buffer",
291                       batch_buf_size, 0x1000);
292     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
293
294     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
295     bo = dri_bo_alloc(i965->intel.bufmgr,
296                       "vpp kernel return buffer",
297                       kernel_return_size, 0x1000);
298     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
299
300     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
301
302     return VA_STATUS_SUCCESS;
303 }
304
305 static VAStatus
306 gen75_gpe_process_prepare(VADriverContextP ctx,
307                           struct vpp_gpe_context *vpp_gpe_ctx)
308 {
309     /*Setup all the memory object*/
310     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
311     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
312     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
313
314     /*Programing media pipeline*/
315     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
316
317     return VA_STATUS_SUCCESS;
318 }
319
320 static VAStatus
321 gen75_gpe_process_run(VADriverContextP ctx,
322                       struct vpp_gpe_context *vpp_gpe_ctx)
323 {
324     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
325
326     return VA_STATUS_SUCCESS;
327 }
328
329 static VAStatus
330 gen75_gpe_process(VADriverContextP ctx,
331                   struct vpp_gpe_context * vpp_gpe_ctx)
332 {
333     VAStatus va_status = VA_STATUS_SUCCESS;
334
335     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
336     if (va_status != VA_STATUS_SUCCESS)
337         return va_status;
338
339     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
340     if (va_status != VA_STATUS_SUCCESS)
341         return va_status;
342
343     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
344     if (va_status != VA_STATUS_SUCCESS)
345         return va_status;
346
347     return VA_STATUS_SUCCESS;
348 }
349
350 static VAStatus
351 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
352                                 struct vpp_gpe_context *vpp_gpe_ctx)
353 {
354     struct object_surface *obj_surface;
355     unsigned int i = 0;
356     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
357                                        vpp_gpe_ctx->backward_surf_sum) * 2;
358
359     /* Binding input NV12 surfaces (Luma + Chroma)*/
360     for (i = 0; i < input_surface_sum; i += 2) {
361         obj_surface = vpp_gpe_ctx->surface_input_object[i / 2];
362         assert(obj_surface);
363         gen8_gpe_media_rw_surface_setup(ctx,
364                                         &vpp_gpe_ctx->gpe_ctx,
365                                         obj_surface,
366                                         BINDING_TABLE_OFFSET_GEN8(i),
367                                         SURFACE_STATE_OFFSET_GEN8(i),
368                                         0);
369
370         gen8_gpe_media_chroma_surface_setup(ctx,
371                                             &vpp_gpe_ctx->gpe_ctx,
372                                             obj_surface,
373                                             BINDING_TABLE_OFFSET_GEN8(i + 1),
374                                             SURFACE_STATE_OFFSET_GEN8(i + 1),
375                                             0);
376     }
377
378     /* Binding output NV12 surface(Luma + Chroma) */
379     obj_surface = vpp_gpe_ctx->surface_output_object;
380     assert(obj_surface);
381     gen8_gpe_media_rw_surface_setup(ctx,
382                                     &vpp_gpe_ctx->gpe_ctx,
383                                     obj_surface,
384                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
385                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum),
386                                     1);
387     gen8_gpe_media_chroma_surface_setup(ctx,
388                                         &vpp_gpe_ctx->gpe_ctx,
389                                         obj_surface,
390                                         BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
391                                         SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1),
392                                         1);
393     /* Bind kernel return buffer surface */
394     gen7_gpe_buffer_suface_setup(ctx,
395                                  &vpp_gpe_ctx->gpe_ctx,
396                                  &vpp_gpe_ctx->vpp_kernel_return,
397                                  BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
398                                  SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
399
400     return VA_STATUS_SUCCESS;
401 }
402
403 static VAStatus
404 gen8_gpe_process_interface_setup(VADriverContextP ctx,
405                                  struct vpp_gpe_context *vpp_gpe_ctx)
406 {
407     struct gen8_interface_descriptor_data *desc;
408     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
409     int i;
410
411     dri_bo_map(bo, 1);
412     assert(bo->virtual);
413     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
414                                                      + vpp_gpe_ctx->gpe_ctx.idrt.offset);
415
416     /*Setup the descritor table*/
417     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) {
418         struct i965_kernel *kernel;
419         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
420         assert(sizeof(*desc) == 32);
421         /*Setup the descritor table*/
422         memset(desc, 0, sizeof(*desc));
423         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
424         desc->desc3.sampler_count = 0; /* FIXME: */
425         desc->desc3.sampler_state_pointer = 0;
426         desc->desc4.binding_table_entry_count = 6; /* FIXME: */
427         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
428         desc->desc5.constant_urb_entry_read_offset = 0;
429         desc->desc5.constant_urb_entry_read_length = 0;
430
431         desc++;
432     }
433
434     dri_bo_unmap(bo);
435
436     return VA_STATUS_SUCCESS;
437 }
438
439 static VAStatus
440 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
441                                  struct vpp_gpe_context *vpp_gpe_ctx)
442 {
443     unsigned int *command_ptr;
444     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
445     unsigned char* position = NULL;
446
447     /* Thread inline data setting*/
448     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
449     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
450
451     for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) {
452         *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2));
453         *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
454         *command_ptr++ = 0;
455         *command_ptr++ = 0;
456         *command_ptr++ = 0;
457         *command_ptr++ = 0;
458
459         /* copy thread inline data */
460         position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
461         memcpy(command_ptr, position, size);
462         command_ptr += size / sizeof(int);
463
464         *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
465         *command_ptr++ = 0;
466     }
467
468     *command_ptr++ = 0;
469     *command_ptr++ = MI_BATCH_BUFFER_END;
470
471     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
472
473     return VA_STATUS_SUCCESS;
474 }
475
476 static VAStatus
477 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
478                                 struct vpp_gpe_context *vpp_gpe_ctx)
479 {
480     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
481     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
482
483     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
484
485     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
486
487     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
488     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
489     OUT_RELOC(vpp_gpe_ctx->batch,
490               vpp_gpe_ctx->vpp_batchbuffer.bo,
491               I915_GEM_DOMAIN_COMMAND, 0,
492               0);
493     OUT_BATCH(vpp_gpe_ctx->batch, 0);
494
495     ADVANCE_BATCH(vpp_gpe_ctx->batch);
496
497     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
498
499     return VA_STATUS_SUCCESS;
500 }
501
502 static VAStatus
503 gen8_gpe_process_init(VADriverContextP ctx,
504                       struct vpp_gpe_context *vpp_gpe_ctx)
505 {
506     struct i965_driver_data *i965 = i965_driver_data(ctx);
507     dri_bo *bo;
508
509     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
510                                   (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
511
512     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
513     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
514     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
515
516     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
517                                        * vpp_gpe_ctx->vpp_kernel_return.size_block;
518
519     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
520     bo = dri_bo_alloc(i965->intel.bufmgr,
521                       "vpp batch buffer",
522                       batch_buf_size, 0x1000);
523     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
524
525     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
526     bo = dri_bo_alloc(i965->intel.bufmgr,
527                       "vpp kernel return buffer",
528                       kernel_return_size, 0x1000);
529     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
530
531     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
532
533     return VA_STATUS_SUCCESS;
534 }
535
536 static VAStatus
537 gen8_gpe_process_prepare(VADriverContextP ctx,
538                          struct vpp_gpe_context *vpp_gpe_ctx)
539 {
540     /*Setup all the memory object*/
541     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
542     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
543     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
544
545     /*Programing media pipeline*/
546     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
547
548     return VA_STATUS_SUCCESS;
549 }
550
551 static VAStatus
552 gen8_gpe_process_run(VADriverContextP ctx,
553                      struct vpp_gpe_context *vpp_gpe_ctx)
554 {
555     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
556
557     return VA_STATUS_SUCCESS;
558 }
559
560 static VAStatus
561 gen8_gpe_process(VADriverContextP ctx,
562                  struct vpp_gpe_context * vpp_gpe_ctx)
563 {
564     VAStatus va_status = VA_STATUS_SUCCESS;
565
566     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
567     if (va_status != VA_STATUS_SUCCESS)
568         return va_status;
569
570     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
571     if (va_status != VA_STATUS_SUCCESS)
572         return va_status;
573
574     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
575     if (va_status != VA_STATUS_SUCCESS)
576         return va_status;
577
578     return VA_STATUS_SUCCESS;
579 }
580
581 static VAStatus
582 vpp_gpe_process(VADriverContextP ctx,
583                 struct vpp_gpe_context * vpp_gpe_ctx)
584 {
585     struct i965_driver_data *i965 = i965_driver_data(ctx);
586     if (IS_HASWELL(i965->intel.device_info))
587         return gen75_gpe_process(ctx, vpp_gpe_ctx);
588     else if (IS_GEN8(i965->intel.device_info) ||
589              IS_GEN9(i965->intel.device_info) ||
590              IS_GEN10(i965->intel.device_info))
591         return gen8_gpe_process(ctx, vpp_gpe_ctx);
592
593     return VA_STATUS_ERROR_UNIMPLEMENTED;
594 }
595
596 static VAStatus
597 vpp_gpe_process_sharpening(VADriverContextP ctx,
598                            struct vpp_gpe_context * vpp_gpe_ctx)
599 {
600     VAStatus va_status = VA_STATUS_SUCCESS;
601     struct i965_driver_data *i965 = i965_driver_data(ctx);
602     struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
603     struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
604
605     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
606     VABufferID *filter_ids = (VABufferID*)pipe->filters ;
607     struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
608
609     assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
610
611     if (!obj_buf ||
612         !obj_buf->buffer_store ||
613         !obj_buf->buffer_store->buffer)
614         goto error;
615
616     VAProcFilterParameterBuffer* filter =
617         (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
618     float sharpening_intensity = filter->value;
619
620     ThreadParameterSharpening thr_param;
621     unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
622     unsigned int i;
623     unsigned char * pos;
624
625     if (vpp_gpe_ctx->is_first_frame) {
626         vpp_gpe_ctx->sub_shader_sum = 3;
627         struct i965_kernel * vpp_kernels;
628         if (IS_HASWELL(i965->intel.device_info))
629             vpp_kernels = gen75_vpp_sharpening_kernels;
630         else if (IS_GEN8(i965->intel.device_info) ||
631                  IS_GEN9(i965->intel.device_info) ||
632                  IS_GEN10(i965->intel.device_info))
633             vpp_kernels = gen8_vpp_sharpening_kernels;
634         else
635             return VA_STATUS_ERROR_UNIMPLEMENTED;
636
637         vpp_gpe_ctx->gpe_load_kernels(ctx,
638                                       &vpp_gpe_ctx->gpe_ctx,
639                                       vpp_kernels,
640                                       vpp_gpe_ctx->sub_shader_sum);
641     }
642
643     if (vpp_gpe_ctx->surface_tmp == VA_INVALID_ID) {
644         va_status = i965_CreateSurfaces(ctx,
645                                         vpp_gpe_ctx->in_frame_w,
646                                         vpp_gpe_ctx->in_frame_h,
647                                         VA_RT_FORMAT_YUV420,
648                                         1,
649                                         &vpp_gpe_ctx->surface_tmp);
650         assert(va_status == VA_STATUS_SUCCESS);
651
652         struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
653         assert(obj_surf);
654
655         if (obj_surf) {
656             i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
657                                         SUBSAMPLE_YUV420);
658             vpp_gpe_ctx->surface_tmp_object = obj_surf;
659         }
660     }
661
662     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
663     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
664     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
665
666     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
667     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
668
669     /* Step 1: horizontal blur process */
670     vpp_gpe_ctx->forward_surf_sum = 0;
671     vpp_gpe_ctx->backward_surf_sum = 0;
672
673     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 16;
674     vpp_gpe_ctx->thread_param_size = thr_param_size;
675     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
676                                                         * vpp_gpe_ctx->thread_num);
677     pos = vpp_gpe_ctx->thread_param;
678
679     if (!pos) {
680         return VA_STATUS_ERROR_ALLOCATION_FAILED;
681     }
682
683     for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
684         thr_param.base.v_pos = 16 * i;
685         thr_param.base.h_pos = 0;
686         memcpy(pos, &thr_param, thr_param_size);
687         pos += thr_param_size;
688     }
689
690     vpp_gpe_ctx->sub_shader_index = 0;
691     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
692     free(vpp_gpe_ctx->thread_param);
693
694     /* Step 2: vertical blur process */
695     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
696     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
697     vpp_gpe_ctx->forward_surf_sum = 0;
698     vpp_gpe_ctx->backward_surf_sum = 0;
699
700     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w / 16;
701     vpp_gpe_ctx->thread_param_size = thr_param_size;
702     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
703                                                         * vpp_gpe_ctx->thread_num);
704     pos = vpp_gpe_ctx->thread_param;
705
706     if (!pos) {
707         return VA_STATUS_ERROR_ALLOCATION_FAILED;
708     }
709
710     for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
711         thr_param.base.v_pos = 0;
712         thr_param.base.h_pos = 16 * i;
713         memcpy(pos, &thr_param, thr_param_size);
714         pos += thr_param_size;
715     }
716
717     vpp_gpe_ctx->sub_shader_index = 1;
718     vpp_gpe_process(ctx, vpp_gpe_ctx);
719     free(vpp_gpe_ctx->thread_param);
720
721     /* Step 3: apply the blur to original surface */
722     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
723     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
724     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
725     vpp_gpe_ctx->forward_surf_sum  = 1;
726     vpp_gpe_ctx->backward_surf_sum = 0;
727
728     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 4;
729     vpp_gpe_ctx->thread_param_size = thr_param_size;
730     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
731                                                         * vpp_gpe_ctx->thread_num);
732     pos = vpp_gpe_ctx->thread_param;
733
734     if (!pos) {
735         return VA_STATUS_ERROR_ALLOCATION_FAILED;
736     }
737
738     for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
739         thr_param.base.v_pos = 4 * i;
740         thr_param.base.h_pos = 0;
741         memcpy(pos, &thr_param, thr_param_size);
742         pos += thr_param_size;
743     }
744
745     vpp_gpe_ctx->sub_shader_index = 2;
746     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
747     free(vpp_gpe_ctx->thread_param);
748
749     return va_status;
750
751 error:
752     return VA_STATUS_ERROR_INVALID_PARAMETER;
753 }
754
755 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
756                                  struct vpp_gpe_context * vpp_gpe_ctx)
757 {
758     VAStatus va_status = VA_STATUS_SUCCESS;
759     struct i965_driver_data *i965 = i965_driver_data(ctx);
760     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
761     VAProcFilterParameterBuffer* filter = NULL;
762     unsigned int i;
763     struct object_surface *obj_surface = NULL;
764
765     if (pipe->num_filters && !pipe->filters)
766         goto error;
767
768     for (i = 0; i < pipe->num_filters; i++) {
769         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
770
771         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
772
773         if (!obj_buf ||
774             !obj_buf->buffer_store ||
775             !obj_buf->buffer_store->buffer)
776             goto error;
777
778         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
779         if (filter->type == VAProcFilterSharpening) {
780             break;
781         }
782     }
783
784     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
785     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
786
787     vpp_gpe_ctx->forward_surf_sum = 0;
788     vpp_gpe_ctx->backward_surf_sum = 0;
789
790     for (i = 0; i < pipe->num_forward_references; i ++) {
791         obj_surface = SURFACE(pipe->forward_references[i]);
792
793         assert(obj_surface);
794         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
795         vpp_gpe_ctx->forward_surf_sum++;
796     }
797
798     for (i = 0; i < pipe->num_backward_references; i ++) {
799         obj_surface = SURFACE(pipe->backward_references[i]);
800
801         assert(obj_surface);
802         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
803         vpp_gpe_ctx->backward_surf_sum++;
804     }
805
806     obj_surface = vpp_gpe_ctx->surface_input_object[0];
807     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
808     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
809
810     if (filter && filter->type == VAProcFilterSharpening) {
811         va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
812     } else {
813         va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
814     }
815
816     vpp_gpe_ctx->is_first_frame = 0;
817
818     return va_status;
819
820 error:
821     return VA_STATUS_ERROR_INVALID_PARAMETER;
822 }
823
824 void
825 vpp_gpe_context_destroy(VADriverContextP ctx,
826                         struct vpp_gpe_context *vpp_gpe_ctx)
827 {
828     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
829     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
830
831     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
832     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
833
834     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
835
836     if (vpp_gpe_ctx->surface_tmp != VA_INVALID_ID) {
837         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
838         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
839         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
840         vpp_gpe_ctx->surface_tmp_object = NULL;
841     }
842
843     if (vpp_gpe_ctx->batch)
844         intel_batchbuffer_free(vpp_gpe_ctx->batch);
845
846     free(vpp_gpe_ctx);
847 }
848
849 struct vpp_gpe_context *
850 vpp_gpe_context_init(VADriverContextP ctx)
851 {
852     struct i965_driver_data *i965 = i965_driver_data(ctx);
853     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
854     assert(vpp_gpe_ctx);
855     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
856
857     assert(IS_HASWELL(i965->intel.device_info) ||
858            IS_GEN8(i965->intel.device_info) ||
859            IS_GEN9(i965->intel.device_info) ||
860            IS_GEN10(i965->intel.device_info));
861
862     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
863     vpp_gpe_ctx->surface_tmp_object = NULL;
864     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
865     vpp_gpe_ctx->is_first_frame = 1;
866
867     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
868     gpe_ctx->vfe_state.num_urb_entries = 16;
869     gpe_ctx->vfe_state.gpgpu_mode = 0;
870     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
871     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
872
873     if (IS_HASWELL(i965->intel.device_info)) {
874         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
875         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
876         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
877         gpe_ctx->surface_state_binding_table.length =
878             (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
879
880         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
881         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
882         gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);
883
884     } else if (IS_GEN8(i965->intel.device_info) ||
885                IS_GEN9(i965->intel.device_info) ||
886                IS_GEN10(i965->intel.device_info)) {
887         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
888         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
889         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
890         gpe_ctx->surface_state_binding_table.length =
891             (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
892
893         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
894         gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
895         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
896     }
897
898     return vpp_gpe_ctx;
899 }
900