OSDN Git Service

Fix the alpha mask at getting derive images
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_vpp_gpe.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *   Li Xiaowei <xiaowei.a.li@intel.com>
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
40
41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6      34
43
44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
46
47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
49
50 #define CURBE_ALLOCATION_SIZE   37              
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
52 #define CURBE_URB_ENTRY_LENGTH  4               
53
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
57 };
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
60 };
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
63 };
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
65     {
66         "vpp: sharpening(horizontal blur)",
67         VPP_GPE_SHARPENING,
68         gen75_gpe_sharpening_h_blur,                    
69         sizeof(gen75_gpe_sharpening_h_blur),            
70         NULL
71     },
72     {
73         "vpp: sharpening(vertical blur)",
74         VPP_GPE_SHARPENING,
75         gen75_gpe_sharpening_v_blur,                    
76         sizeof(gen75_gpe_sharpening_v_blur),            
77         NULL
78     },
79     {
80         "vpp: sharpening(unmask)",
81         VPP_GPE_SHARPENING,
82         gen75_gpe_sharpening_unmask,                    
83         sizeof(gen75_gpe_sharpening_unmask),            
84         NULL
85     },
86 }; 
87
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
91 };
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
94 };
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
97 };
98
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
100     {
101         "vpp: sharpening(horizontal blur)",
102         VPP_GPE_SHARPENING,
103         gen8_gpe_sharpening_h_blur,
104         sizeof(gen8_gpe_sharpening_h_blur),
105         NULL
106     },
107     {
108         "vpp: sharpening(vertical blur)",
109         VPP_GPE_SHARPENING,
110         gen8_gpe_sharpening_v_blur,
111         sizeof(gen8_gpe_sharpening_v_blur),
112         NULL
113     },
114     {
115         "vpp: sharpening(unmask)",
116         VPP_GPE_SHARPENING,
117         gen8_gpe_sharpening_unmask,
118         sizeof(gen8_gpe_sharpening_unmask),
119         NULL
120     },
121 };
122
123 static VAStatus
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125                    struct vpp_gpe_context *vpp_gpe_ctx)
126 {
127     struct object_surface *obj_surface;
128     unsigned int i = 0;
129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130                                          vpp_gpe_ctx->backward_surf_sum) * 2;
131
132     /* Binding input NV12 surfaces (Luma + Chroma)*/
133     for( i = 0; i < input_surface_sum; i += 2){ 
134          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
135          assert(obj_surface);
136          gen7_gpe_media_rw_surface_setup(ctx,
137                                          &vpp_gpe_ctx->gpe_ctx,
138                                           obj_surface,
139                                           BINDING_TABLE_OFFSET_GEN7(i),
140                                           SURFACE_STATE_OFFSET_GEN7(i));
141
142          gen75_gpe_media_chroma_surface_setup(ctx,
143                                           &vpp_gpe_ctx->gpe_ctx,
144                                           obj_surface,
145                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
146                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
147     }
148
149     /* Binding output NV12 surface(Luma + Chroma) */
150     obj_surface = vpp_gpe_ctx->surface_output_object;
151     assert(obj_surface);
152     gen7_gpe_media_rw_surface_setup(ctx,
153                                     &vpp_gpe_ctx->gpe_ctx,
154                                     obj_surface,
155                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
156                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
157     gen75_gpe_media_chroma_surface_setup(ctx,
158                                     &vpp_gpe_ctx->gpe_ctx,
159                                     obj_surface,
160                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
161                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
162     /* Bind kernel return buffer surface */
163     gen7_gpe_buffer_suface_setup(ctx,
164                                   &vpp_gpe_ctx->gpe_ctx,
165                                   &vpp_gpe_ctx->vpp_kernel_return,
166                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
167                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
168
169     return VA_STATUS_SUCCESS;
170 }
171
172 static VAStatus
173 gen75_gpe_process_interface_setup(VADriverContextP ctx,
174                     struct vpp_gpe_context *vpp_gpe_ctx)
175 {
176     struct gen6_interface_descriptor_data *desc;   
177     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
178     int i; 
179
180     dri_bo_map(bo, 1);
181     assert(bo->virtual);
182     desc = bo->virtual;
183     
184     /*Setup the descritor table*/
185     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
186         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
187         assert(sizeof(*desc) == 32);
188         memset(desc, 0, sizeof(*desc));
189         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
190         desc->desc2.sampler_count = 0; /* FIXME: */
191         desc->desc2.sampler_state_pointer = 0;
192         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
193         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
194         desc->desc4.constant_urb_entry_read_offset = 0;
195         desc->desc4.constant_urb_entry_read_length = 0;
196
197         dri_bo_emit_reloc(bo,   
198                           I915_GEM_DOMAIN_INSTRUCTION, 0,
199                           0,
200                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
201                           kernel->bo);
202         desc++;
203     }
204
205     dri_bo_unmap(bo);
206
207     return VA_STATUS_SUCCESS;
208 }
209
210 static VAStatus 
211 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
212                            struct vpp_gpe_context *vpp_gpe_ctx)
213 {
214     unsigned int *command_ptr;
215     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
216     unsigned char* position = NULL;
217
218     /* Thread inline data setting*/
219     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
220     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
221
222     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
223     {
224          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
225          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
226          *command_ptr++ = 0;
227          *command_ptr++ = 0;
228          *command_ptr++ = 0;
229          *command_ptr++ = 0;
230    
231          /* copy thread inline data */
232          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
233          memcpy(command_ptr, position, size);
234          command_ptr += size/sizeof(int);
235     }   
236
237     *command_ptr++ = 0;
238     *command_ptr++ = MI_BATCH_BUFFER_END;
239
240     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
241
242     return VA_STATUS_SUCCESS;
243 }
244
245 static VAStatus
246 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
247                    struct vpp_gpe_context *vpp_gpe_ctx)
248 {
249     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
250     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
251
252     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
253  
254     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
255    
256     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
257     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
258     OUT_RELOC(vpp_gpe_ctx->batch,
259               vpp_gpe_ctx->vpp_batchbuffer.bo,
260               I915_GEM_DOMAIN_COMMAND, 0, 
261               0);
262     ADVANCE_BATCH(vpp_gpe_ctx->batch);
263
264     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
265         
266     return VA_STATUS_SUCCESS;
267 }
268
269 static VAStatus
270 gen75_gpe_process_init(VADriverContextP ctx,
271                  struct vpp_gpe_context *vpp_gpe_ctx)
272 {
273     struct i965_driver_data *i965 = i965_driver_data(ctx);
274     dri_bo *bo;
275
276     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * 
277                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
278
279     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
280     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
281     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
282     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks   
283            * vpp_gpe_ctx->vpp_kernel_return.size_block;
284  
285     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
286     bo = dri_bo_alloc(i965->intel.bufmgr,
287                       "vpp batch buffer",
288                        batch_buf_size, 0x1000);
289     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
290
291     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
292     bo = dri_bo_alloc(i965->intel.bufmgr,
293                       "vpp kernel return buffer",
294                        kernel_return_size, 0x1000);
295     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
296
297     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
298
299     return VA_STATUS_SUCCESS;
300 }
301
302 static VAStatus
303 gen75_gpe_process_prepare(VADriverContextP ctx,
304                     struct vpp_gpe_context *vpp_gpe_ctx)
305 {
306     /*Setup all the memory object*/
307     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
308     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
309     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
310
311     /*Programing media pipeline*/
312     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
313         
314     return VA_STATUS_SUCCESS;
315 }
316
317 static VAStatus
318 gen75_gpe_process_run(VADriverContextP ctx,
319                 struct vpp_gpe_context *vpp_gpe_ctx)
320 {
321     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
322     
323     return VA_STATUS_SUCCESS;
324 }
325
326 static VAStatus
327 gen75_gpe_process(VADriverContextP ctx,
328                   struct vpp_gpe_context * vpp_gpe_ctx)
329 {
330     VAStatus va_status = VA_STATUS_SUCCESS;
331
332     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
333     if (va_status != VA_STATUS_SUCCESS)
334         return va_status;
335
336     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
337     if (va_status != VA_STATUS_SUCCESS)
338         return va_status;
339
340     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
341     if (va_status != VA_STATUS_SUCCESS)
342         return va_status;
343
344     return VA_STATUS_SUCCESS;
345 }
346
347 static VAStatus
348 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
349                    struct vpp_gpe_context *vpp_gpe_ctx)
350 {
351     struct object_surface *obj_surface;
352     unsigned int i = 0;
353     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
354                                          vpp_gpe_ctx->backward_surf_sum) * 2;
355
356     /* Binding input NV12 surfaces (Luma + Chroma)*/
357     for( i = 0; i < input_surface_sum; i += 2){
358          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
359          assert(obj_surface);
360          gen8_gpe_media_rw_surface_setup(ctx,
361                                          &vpp_gpe_ctx->gpe_ctx,
362                                           obj_surface,
363                                           BINDING_TABLE_OFFSET_GEN8(i),
364                                           SURFACE_STATE_OFFSET_GEN8(i));
365
366          gen8_gpe_media_chroma_surface_setup(ctx,
367                                           &vpp_gpe_ctx->gpe_ctx,
368                                           obj_surface,
369                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
370                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
371     }
372
373     /* Binding output NV12 surface(Luma + Chroma) */
374     obj_surface = vpp_gpe_ctx->surface_output_object;
375     assert(obj_surface);
376     gen8_gpe_media_rw_surface_setup(ctx,
377                                     &vpp_gpe_ctx->gpe_ctx,
378                                     obj_surface,
379                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
380                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
381     gen8_gpe_media_chroma_surface_setup(ctx,
382                                     &vpp_gpe_ctx->gpe_ctx,
383                                     obj_surface,
384                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
385                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
386     /* Bind kernel return buffer surface */
387     gen7_gpe_buffer_suface_setup(ctx,
388                                   &vpp_gpe_ctx->gpe_ctx,
389                                   &vpp_gpe_ctx->vpp_kernel_return,
390                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
391                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
392
393     return VA_STATUS_SUCCESS;
394 }
395
396 static VAStatus
397 gen8_gpe_process_interface_setup(VADriverContextP ctx,
398                     struct vpp_gpe_context *vpp_gpe_ctx)
399 {
400     struct gen8_interface_descriptor_data *desc;
401     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
402     int i;
403
404     dri_bo_map(bo, 1);
405     assert(bo->virtual);
406     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
407                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
408
409     /*Setup the descritor table*/
410     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
411         struct i965_kernel *kernel;
412         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
413         assert(sizeof(*desc) == 32);
414         /*Setup the descritor table*/
415          memset(desc, 0, sizeof(*desc));
416          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
417          desc->desc3.sampler_count = 0; /* FIXME: */
418          desc->desc3.sampler_state_pointer = 0;
419          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
420          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
421          desc->desc5.constant_urb_entry_read_offset = 0;
422          desc->desc5.constant_urb_entry_read_length = 0;
423
424          desc++;
425     }
426
427     dri_bo_unmap(bo);
428
429     return VA_STATUS_SUCCESS;
430 }
431
432 static VAStatus
433 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
434                            struct vpp_gpe_context *vpp_gpe_ctx)
435 {
436     unsigned int *command_ptr;
437     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
438     unsigned char* position = NULL;
439
440     /* Thread inline data setting*/
441     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
442     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
443
444     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
445     {
446          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
447          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
448          *command_ptr++ = 0;
449          *command_ptr++ = 0;
450          *command_ptr++ = 0;
451          *command_ptr++ = 0;
452
453          /* copy thread inline data */
454          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
455          memcpy(command_ptr, position, size);
456          command_ptr += size/sizeof(int);
457
458          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
459          *command_ptr++ = 0;
460     }
461
462     *command_ptr++ = 0;
463     *command_ptr++ = MI_BATCH_BUFFER_END;
464
465     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
466
467     return VA_STATUS_SUCCESS;
468 }
469
470 static VAStatus
471 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
472                    struct vpp_gpe_context *vpp_gpe_ctx)
473 {
474     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
475     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
476
477     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
478
479     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
480
481     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
482     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
483     OUT_RELOC(vpp_gpe_ctx->batch,
484               vpp_gpe_ctx->vpp_batchbuffer.bo,
485               I915_GEM_DOMAIN_COMMAND, 0,
486               0);
487     OUT_BATCH(vpp_gpe_ctx->batch, 0);
488
489     ADVANCE_BATCH(vpp_gpe_ctx->batch);
490
491     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
492
493     return VA_STATUS_SUCCESS;
494 }
495
496 static VAStatus
497 gen8_gpe_process_init(VADriverContextP ctx,
498                  struct vpp_gpe_context *vpp_gpe_ctx)
499 {
500     struct i965_driver_data *i965 = i965_driver_data(ctx);
501     dri_bo *bo;
502
503     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
504                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
505
506     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
507     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
508     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
509
510     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
511            * vpp_gpe_ctx->vpp_kernel_return.size_block;
512
513     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
514     bo = dri_bo_alloc(i965->intel.bufmgr,
515                       "vpp batch buffer",
516                        batch_buf_size, 0x1000);
517     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
518
519     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
520     bo = dri_bo_alloc(i965->intel.bufmgr,
521                       "vpp kernel return buffer",
522                        kernel_return_size, 0x1000);
523     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
524
525     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
526
527     return VA_STATUS_SUCCESS;
528 }
529
530 static VAStatus
531 gen8_gpe_process_prepare(VADriverContextP ctx,
532                     struct vpp_gpe_context *vpp_gpe_ctx)
533 {
534     /*Setup all the memory object*/
535     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
536     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
537     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
538
539     /*Programing media pipeline*/
540     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
541
542     return VA_STATUS_SUCCESS;
543 }
544
545 static VAStatus
546 gen8_gpe_process_run(VADriverContextP ctx,
547                 struct vpp_gpe_context *vpp_gpe_ctx)
548 {
549     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
550
551     return VA_STATUS_SUCCESS;
552 }
553
554 static VAStatus
555 gen8_gpe_process(VADriverContextP ctx,
556                   struct vpp_gpe_context * vpp_gpe_ctx)
557 {
558     VAStatus va_status = VA_STATUS_SUCCESS;
559
560     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
561     if (va_status != VA_STATUS_SUCCESS)
562         return va_status;
563
564     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
565     if (va_status != VA_STATUS_SUCCESS)
566         return va_status;
567
568     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
569     if (va_status != VA_STATUS_SUCCESS)
570         return va_status;
571
572     return VA_STATUS_SUCCESS;
573 }
574
575 static VAStatus
576 vpp_gpe_process(VADriverContextP ctx,
577                   struct vpp_gpe_context * vpp_gpe_ctx)
578 {
579     struct i965_driver_data *i965 = i965_driver_data(ctx);
580     if (IS_HASWELL(i965->intel.device_info))
581        return gen75_gpe_process(ctx, vpp_gpe_ctx);
582     else if (IS_GEN8(i965->intel.device_info) ||
583              IS_GEN9(i965->intel.device_info))
584        return gen8_gpe_process(ctx, vpp_gpe_ctx);
585
586      return VA_STATUS_ERROR_UNIMPLEMENTED;
587 }
588
589 static VAStatus
590 vpp_gpe_process_sharpening(VADriverContextP ctx,
591                              struct vpp_gpe_context * vpp_gpe_ctx)
592 {
593      VAStatus va_status = VA_STATUS_SUCCESS;
594      struct i965_driver_data *i965 = i965_driver_data(ctx);
595      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
596      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
597
598      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
599      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
600      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
601
602      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
603        
604      if (!obj_buf ||
605          !obj_buf->buffer_store ||
606          !obj_buf->buffer_store->buffer)
607          goto error;
608
609      VAProcFilterParameterBuffer* filter =
610                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
611      float sharpening_intensity = filter->value;
612
613      ThreadParameterSharpening thr_param;
614      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
615      unsigned int i;
616      unsigned char * pos;
617
618      if(vpp_gpe_ctx->is_first_frame){
619          vpp_gpe_ctx->sub_shader_sum = 3;
620          struct i965_kernel * vpp_kernels;
621          if (IS_HASWELL(i965->intel.device_info))
622              vpp_kernels = gen75_vpp_sharpening_kernels;
623          else if (IS_GEN8(i965->intel.device_info) ||
624                   IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
625              vpp_kernels = gen8_vpp_sharpening_kernels;
626          else
627              return VA_STATUS_ERROR_UNIMPLEMENTED;
628
629          vpp_gpe_ctx->gpe_load_kernels(ctx,
630                                &vpp_gpe_ctx->gpe_ctx,
631                                vpp_kernels,
632                                vpp_gpe_ctx->sub_shader_sum);
633      }
634
635      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
636         va_status = i965_CreateSurfaces(ctx,
637                                        vpp_gpe_ctx->in_frame_w,
638                                        vpp_gpe_ctx->in_frame_h,
639                                        VA_RT_FORMAT_YUV420,
640                                        1,
641                                        &vpp_gpe_ctx->surface_tmp);
642        assert(va_status == VA_STATUS_SUCCESS);
643     
644        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
645        assert(obj_surf);
646
647        if (obj_surf) {
648            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
649                                        SUBSAMPLE_YUV420);
650            vpp_gpe_ctx->surface_tmp_object = obj_surf;
651        }
652     }                
653
654     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
655     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
656     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
657
658     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
659     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
660
661     /* Step 1: horizontal blur process */      
662     vpp_gpe_ctx->forward_surf_sum = 0;
663     vpp_gpe_ctx->backward_surf_sum = 0;
664  
665     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
666     vpp_gpe_ctx->thread_param_size = thr_param_size;
667     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
668                                                        *vpp_gpe_ctx->thread_num);
669     pos = vpp_gpe_ctx->thread_param;
670
671     if (!pos) {
672         return VA_STATUS_ERROR_ALLOCATION_FAILED;
673     }
674
675     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
676         thr_param.base.v_pos = 16 * i;
677         thr_param.base.h_pos = 0;
678         memcpy(pos, &thr_param, thr_param_size);
679         pos += thr_param_size;
680     }
681
682     vpp_gpe_ctx->sub_shader_index = 0;
683     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
684     free(vpp_gpe_ctx->thread_param);
685
686     /* Step 2: vertical blur process */ 
687     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
688     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
689     vpp_gpe_ctx->forward_surf_sum = 0;
690     vpp_gpe_ctx->backward_surf_sum = 0;
691  
692     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
693     vpp_gpe_ctx->thread_param_size = thr_param_size;
694     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
695                                                        *vpp_gpe_ctx->thread_num);
696     pos = vpp_gpe_ctx->thread_param;
697
698     if (!pos) {
699         return VA_STATUS_ERROR_ALLOCATION_FAILED;
700     }
701
702     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
703         thr_param.base.v_pos = 0;
704         thr_param.base.h_pos = 16 * i;
705         memcpy(pos, &thr_param, thr_param_size);
706         pos += thr_param_size;
707     }
708
709     vpp_gpe_ctx->sub_shader_index = 1;
710     vpp_gpe_process(ctx, vpp_gpe_ctx);
711     free(vpp_gpe_ctx->thread_param);
712
713     /* Step 3: apply the blur to original surface */      
714     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
715     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
716     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
717     vpp_gpe_ctx->forward_surf_sum  = 1;
718     vpp_gpe_ctx->backward_surf_sum = 0;
719  
720     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
721     vpp_gpe_ctx->thread_param_size = thr_param_size;
722     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
723                                                        *vpp_gpe_ctx->thread_num);
724     pos = vpp_gpe_ctx->thread_param;
725
726     if (!pos) {
727         return VA_STATUS_ERROR_ALLOCATION_FAILED;
728     }
729
730     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
731         thr_param.base.v_pos = 4 * i;
732         thr_param.base.h_pos = 0;
733         memcpy(pos, &thr_param, thr_param_size);
734         pos += thr_param_size;
735     }
736
737     vpp_gpe_ctx->sub_shader_index = 2;
738     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
739     free(vpp_gpe_ctx->thread_param);
740
741     return va_status;
742
743 error:
744     return VA_STATUS_ERROR_INVALID_PARAMETER;
745 }
746
747 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
748                     struct vpp_gpe_context * vpp_gpe_ctx)
749 {
750     VAStatus va_status = VA_STATUS_SUCCESS;
751     struct i965_driver_data *i965 = i965_driver_data(ctx);
752     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
753     VAProcFilterParameterBuffer* filter = NULL;
754     unsigned int i;
755     struct object_surface *obj_surface = NULL;
756
757     if (pipe->num_filters && !pipe->filters)
758         goto error;
759
760     for(i = 0; i < pipe->num_filters; i++){
761         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
762
763         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
764
765         if (!obj_buf ||
766             !obj_buf->buffer_store ||
767             !obj_buf->buffer_store->buffer)
768             goto error;
769
770         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
771         if(filter->type == VAProcFilterSharpening){
772            break;
773         }
774     }
775        
776     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
777     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
778
779     vpp_gpe_ctx->forward_surf_sum = 0;
780     vpp_gpe_ctx->backward_surf_sum = 0;
781  
782     for(i = 0; i < pipe->num_forward_references; i ++)
783     {
784         obj_surface = SURFACE(pipe->forward_references[i]);
785
786         assert(obj_surface);
787         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
788         vpp_gpe_ctx->forward_surf_sum++;
789     } 
790
791     for(i = 0; i < pipe->num_backward_references; i ++)
792     {
793         obj_surface = SURFACE(pipe->backward_references[i]);
794         
795         assert(obj_surface);
796         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
797         vpp_gpe_ctx->backward_surf_sum++;
798     } 
799
800     obj_surface = vpp_gpe_ctx->surface_input_object[0];
801     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
802     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
803
804     if(filter && filter->type == VAProcFilterSharpening) {
805        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); 
806     } else {
807        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
808     }
809
810     vpp_gpe_ctx->is_first_frame = 0;
811
812     return va_status;
813
814 error:
815     return VA_STATUS_ERROR_INVALID_PARAMETER;
816 }
817
818 void 
819 vpp_gpe_context_destroy(VADriverContextP ctx,
820                                struct vpp_gpe_context *vpp_gpe_ctx)
821 {
822     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
823     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
824
825     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
826     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
827
828     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
829
830     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
831         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
832         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
833         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
834         vpp_gpe_ctx->surface_tmp_object = NULL;
835     }   
836
837     if (vpp_gpe_ctx->batch)
838         intel_batchbuffer_free(vpp_gpe_ctx->batch);
839
840     free(vpp_gpe_ctx);
841 }
842
843 struct vpp_gpe_context *
844 vpp_gpe_context_init(VADriverContextP ctx)
845 {
846     struct i965_driver_data *i965 = i965_driver_data(ctx);
847     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
848     assert(vpp_gpe_ctx);
849     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
850
851     assert(IS_HASWELL(i965->intel.device_info) ||
852            IS_GEN8(i965->intel.device_info) ||
853            IS_GEN9(i965->intel.device_info));
854
855     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
856     vpp_gpe_ctx->surface_tmp_object = NULL;
857     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
858     vpp_gpe_ctx->is_first_frame = 1;
859
860     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
861     gpe_ctx->vfe_state.num_urb_entries = 16;
862     gpe_ctx->vfe_state.gpgpu_mode = 0;
863     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
864     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
865  
866     if (IS_HASWELL(i965->intel.device_info)) {
867         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
868         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
869         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
870         gpe_ctx->surface_state_binding_table.length =
871                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
872
873         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
874         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
875         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
876
877     } else if (IS_GEN8(i965->intel.device_info) ||
878                IS_GEN9(i965->intel.device_info)) {
879         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
880         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
881         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
882         gpe_ctx->surface_state_binding_table.length =
883                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
884
885         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
886         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
887
888     }
889
890     return vpp_gpe_ctx;
891 }
892