OSDN Git Service

configure: change package tarball name to use hyphens
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_vpp_gpe.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *   Li Xiaowei <xiaowei.a.li@intel.com>
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
40
41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6      34
43
44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
46
47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
49
50 #define CURBE_ALLOCATION_SIZE   37              
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
52 #define CURBE_URB_ENTRY_LENGTH  4               
53
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
57 };
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
60 };
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
63 };
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
65     {
66         "vpp: sharpening(horizontal blur)",
67         VPP_GPE_SHARPENING,
68         gen75_gpe_sharpening_h_blur,                    
69         sizeof(gen75_gpe_sharpening_h_blur),            
70         NULL
71     },
72     {
73         "vpp: sharpening(vertical blur)",
74         VPP_GPE_SHARPENING,
75         gen75_gpe_sharpening_v_blur,                    
76         sizeof(gen75_gpe_sharpening_v_blur),            
77         NULL
78     },
79     {
80         "vpp: sharpening(unmask)",
81         VPP_GPE_SHARPENING,
82         gen75_gpe_sharpening_unmask,                    
83         sizeof(gen75_gpe_sharpening_unmask),            
84         NULL
85     },
86 }; 
87
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
91 };
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
94 };
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
97 };
98
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
100     {
101         "vpp: sharpening(horizontal blur)",
102         VPP_GPE_SHARPENING,
103         gen8_gpe_sharpening_h_blur,
104         sizeof(gen8_gpe_sharpening_h_blur),
105         NULL
106     },
107     {
108         "vpp: sharpening(vertical blur)",
109         VPP_GPE_SHARPENING,
110         gen8_gpe_sharpening_v_blur,
111         sizeof(gen8_gpe_sharpening_v_blur),
112         NULL
113     },
114     {
115         "vpp: sharpening(unmask)",
116         VPP_GPE_SHARPENING,
117         gen8_gpe_sharpening_unmask,
118         sizeof(gen8_gpe_sharpening_unmask),
119         NULL
120     },
121 };
122
123 static VAStatus
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125                    struct vpp_gpe_context *vpp_gpe_ctx)
126 {
127     struct object_surface *obj_surface;
128     unsigned int i = 0;
129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130                                          vpp_gpe_ctx->backward_surf_sum) * 2;
131
132     /* Binding input NV12 surfaces (Luma + Chroma)*/
133     for( i = 0; i < input_surface_sum; i += 2){ 
134          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
135          assert(obj_surface);
136          gen7_gpe_media_rw_surface_setup(ctx,
137                                          &vpp_gpe_ctx->gpe_ctx,
138                                           obj_surface,
139                                           BINDING_TABLE_OFFSET_GEN7(i),
140                                           SURFACE_STATE_OFFSET_GEN7(i),
141                                           0);
142
143          gen75_gpe_media_chroma_surface_setup(ctx,
144                                           &vpp_gpe_ctx->gpe_ctx,
145                                           obj_surface,
146                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
147                                           SURFACE_STATE_OFFSET_GEN7(i + 1),
148                                           0);
149     }
150
151     /* Binding output NV12 surface(Luma + Chroma) */
152     obj_surface = vpp_gpe_ctx->surface_output_object;
153     assert(obj_surface);
154     gen7_gpe_media_rw_surface_setup(ctx,
155                                     &vpp_gpe_ctx->gpe_ctx,
156                                     obj_surface,
157                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
158                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum),
159                                     1);
160     gen75_gpe_media_chroma_surface_setup(ctx,
161                                     &vpp_gpe_ctx->gpe_ctx,
162                                     obj_surface,
163                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
164                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1),
165                                     1);
166     /* Bind kernel return buffer surface */
167     gen7_gpe_buffer_suface_setup(ctx,
168                                   &vpp_gpe_ctx->gpe_ctx,
169                                   &vpp_gpe_ctx->vpp_kernel_return,
170                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
171                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
172
173     return VA_STATUS_SUCCESS;
174 }
175
176 static VAStatus
177 gen75_gpe_process_interface_setup(VADriverContextP ctx,
178                     struct vpp_gpe_context *vpp_gpe_ctx)
179 {
180     struct gen6_interface_descriptor_data *desc;   
181     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
182     int i; 
183
184     dri_bo_map(bo, 1);
185     assert(bo->virtual);
186     desc = bo->virtual;
187     
188     /*Setup the descritor table*/
189     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
190         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
191         assert(sizeof(*desc) == 32);
192         memset(desc, 0, sizeof(*desc));
193         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
194         desc->desc2.sampler_count = 0; /* FIXME: */
195         desc->desc2.sampler_state_pointer = 0;
196         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
197         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
198         desc->desc4.constant_urb_entry_read_offset = 0;
199         desc->desc4.constant_urb_entry_read_length = 0;
200
201         dri_bo_emit_reloc(bo,   
202                           I915_GEM_DOMAIN_INSTRUCTION, 0,
203                           0,
204                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
205                           kernel->bo);
206         desc++;
207     }
208
209     dri_bo_unmap(bo);
210
211     return VA_STATUS_SUCCESS;
212 }
213
214 static VAStatus 
215 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
216                            struct vpp_gpe_context *vpp_gpe_ctx)
217 {
218     unsigned int *command_ptr;
219     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
220     unsigned char* position = NULL;
221
222     /* Thread inline data setting*/
223     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
224     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
225
226     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
227     {
228          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
229          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
230          *command_ptr++ = 0;
231          *command_ptr++ = 0;
232          *command_ptr++ = 0;
233          *command_ptr++ = 0;
234    
235          /* copy thread inline data */
236          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
237          memcpy(command_ptr, position, size);
238          command_ptr += size/sizeof(int);
239     }   
240
241     *command_ptr++ = 0;
242     *command_ptr++ = MI_BATCH_BUFFER_END;
243
244     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
245
246     return VA_STATUS_SUCCESS;
247 }
248
249 static VAStatus
250 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
251                    struct vpp_gpe_context *vpp_gpe_ctx)
252 {
253     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
254     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
255
256     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
257  
258     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
259    
260     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
261     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
262     OUT_RELOC(vpp_gpe_ctx->batch,
263               vpp_gpe_ctx->vpp_batchbuffer.bo,
264               I915_GEM_DOMAIN_COMMAND, 0, 
265               0);
266     ADVANCE_BATCH(vpp_gpe_ctx->batch);
267
268     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
269         
270     return VA_STATUS_SUCCESS;
271 }
272
273 static VAStatus
274 gen75_gpe_process_init(VADriverContextP ctx,
275                  struct vpp_gpe_context *vpp_gpe_ctx)
276 {
277     struct i965_driver_data *i965 = i965_driver_data(ctx);
278     dri_bo *bo;
279
280     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num * 
281                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
282
283     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
284     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
285     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
286     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks   
287            * vpp_gpe_ctx->vpp_kernel_return.size_block;
288  
289     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
290     bo = dri_bo_alloc(i965->intel.bufmgr,
291                       "vpp batch buffer",
292                        batch_buf_size, 0x1000);
293     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
294
295     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
296     bo = dri_bo_alloc(i965->intel.bufmgr,
297                       "vpp kernel return buffer",
298                        kernel_return_size, 0x1000);
299     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
300
301     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
302
303     return VA_STATUS_SUCCESS;
304 }
305
306 static VAStatus
307 gen75_gpe_process_prepare(VADriverContextP ctx,
308                     struct vpp_gpe_context *vpp_gpe_ctx)
309 {
310     /*Setup all the memory object*/
311     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
312     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
313     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
314
315     /*Programing media pipeline*/
316     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
317         
318     return VA_STATUS_SUCCESS;
319 }
320
321 static VAStatus
322 gen75_gpe_process_run(VADriverContextP ctx,
323                 struct vpp_gpe_context *vpp_gpe_ctx)
324 {
325     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
326     
327     return VA_STATUS_SUCCESS;
328 }
329
330 static VAStatus
331 gen75_gpe_process(VADriverContextP ctx,
332                   struct vpp_gpe_context * vpp_gpe_ctx)
333 {
334     VAStatus va_status = VA_STATUS_SUCCESS;
335
336     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
337     if (va_status != VA_STATUS_SUCCESS)
338         return va_status;
339
340     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
341     if (va_status != VA_STATUS_SUCCESS)
342         return va_status;
343
344     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
345     if (va_status != VA_STATUS_SUCCESS)
346         return va_status;
347
348     return VA_STATUS_SUCCESS;
349 }
350
351 static VAStatus
352 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
353                    struct vpp_gpe_context *vpp_gpe_ctx)
354 {
355     struct object_surface *obj_surface;
356     unsigned int i = 0;
357     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
358                                          vpp_gpe_ctx->backward_surf_sum) * 2;
359
360     /* Binding input NV12 surfaces (Luma + Chroma)*/
361     for( i = 0; i < input_surface_sum; i += 2){
362          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
363          assert(obj_surface);
364          gen8_gpe_media_rw_surface_setup(ctx,
365                                          &vpp_gpe_ctx->gpe_ctx,
366                                           obj_surface,
367                                           BINDING_TABLE_OFFSET_GEN8(i),
368                                           SURFACE_STATE_OFFSET_GEN8(i),
369                                           0);
370
371          gen8_gpe_media_chroma_surface_setup(ctx,
372                                           &vpp_gpe_ctx->gpe_ctx,
373                                           obj_surface,
374                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
375                                           SURFACE_STATE_OFFSET_GEN8(i + 1),
376                                           0);
377     }
378
379     /* Binding output NV12 surface(Luma + Chroma) */
380     obj_surface = vpp_gpe_ctx->surface_output_object;
381     assert(obj_surface);
382     gen8_gpe_media_rw_surface_setup(ctx,
383                                     &vpp_gpe_ctx->gpe_ctx,
384                                     obj_surface,
385                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
386                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum),
387                                     1);
388     gen8_gpe_media_chroma_surface_setup(ctx,
389                                     &vpp_gpe_ctx->gpe_ctx,
390                                     obj_surface,
391                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
392                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1),
393                                     1);
394     /* Bind kernel return buffer surface */
395     gen7_gpe_buffer_suface_setup(ctx,
396                                   &vpp_gpe_ctx->gpe_ctx,
397                                   &vpp_gpe_ctx->vpp_kernel_return,
398                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
399                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
400
401     return VA_STATUS_SUCCESS;
402 }
403
404 static VAStatus
405 gen8_gpe_process_interface_setup(VADriverContextP ctx,
406                     struct vpp_gpe_context *vpp_gpe_ctx)
407 {
408     struct gen8_interface_descriptor_data *desc;
409     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
410     int i;
411
412     dri_bo_map(bo, 1);
413     assert(bo->virtual);
414     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
415                                + vpp_gpe_ctx->gpe_ctx.idrt.offset);
416
417     /*Setup the descritor table*/
418     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
419         struct i965_kernel *kernel;
420         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
421         assert(sizeof(*desc) == 32);
422         /*Setup the descritor table*/
423          memset(desc, 0, sizeof(*desc));
424          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
425          desc->desc3.sampler_count = 0; /* FIXME: */
426          desc->desc3.sampler_state_pointer = 0;
427          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
428          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
429          desc->desc5.constant_urb_entry_read_offset = 0;
430          desc->desc5.constant_urb_entry_read_length = 0;
431
432          desc++;
433     }
434
435     dri_bo_unmap(bo);
436
437     return VA_STATUS_SUCCESS;
438 }
439
440 static VAStatus
441 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
442                            struct vpp_gpe_context *vpp_gpe_ctx)
443 {
444     unsigned int *command_ptr;
445     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
446     unsigned char* position = NULL;
447
448     /* Thread inline data setting*/
449     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
450     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
451
452     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
453     {
454          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
455          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
456          *command_ptr++ = 0;
457          *command_ptr++ = 0;
458          *command_ptr++ = 0;
459          *command_ptr++ = 0;
460
461          /* copy thread inline data */
462          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
463          memcpy(command_ptr, position, size);
464          command_ptr += size/sizeof(int);
465
466          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
467          *command_ptr++ = 0;
468     }
469
470     *command_ptr++ = 0;
471     *command_ptr++ = MI_BATCH_BUFFER_END;
472
473     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
474
475     return VA_STATUS_SUCCESS;
476 }
477
478 static VAStatus
479 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
480                    struct vpp_gpe_context *vpp_gpe_ctx)
481 {
482     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
483     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
484
485     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
486
487     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
488
489     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
490     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
491     OUT_RELOC(vpp_gpe_ctx->batch,
492               vpp_gpe_ctx->vpp_batchbuffer.bo,
493               I915_GEM_DOMAIN_COMMAND, 0,
494               0);
495     OUT_BATCH(vpp_gpe_ctx->batch, 0);
496
497     ADVANCE_BATCH(vpp_gpe_ctx->batch);
498
499     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
500
501     return VA_STATUS_SUCCESS;
502 }
503
504 static VAStatus
505 gen8_gpe_process_init(VADriverContextP ctx,
506                  struct vpp_gpe_context *vpp_gpe_ctx)
507 {
508     struct i965_driver_data *i965 = i965_driver_data(ctx);
509     dri_bo *bo;
510
511     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
512                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
513
514     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
515     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
516     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
517
518     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
519            * vpp_gpe_ctx->vpp_kernel_return.size_block;
520
521     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
522     bo = dri_bo_alloc(i965->intel.bufmgr,
523                       "vpp batch buffer",
524                        batch_buf_size, 0x1000);
525     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
526
527     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
528     bo = dri_bo_alloc(i965->intel.bufmgr,
529                       "vpp kernel return buffer",
530                        kernel_return_size, 0x1000);
531     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
532
533     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
534
535     return VA_STATUS_SUCCESS;
536 }
537
538 static VAStatus
539 gen8_gpe_process_prepare(VADriverContextP ctx,
540                     struct vpp_gpe_context *vpp_gpe_ctx)
541 {
542     /*Setup all the memory object*/
543     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
544     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
545     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
546
547     /*Programing media pipeline*/
548     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
549
550     return VA_STATUS_SUCCESS;
551 }
552
553 static VAStatus
554 gen8_gpe_process_run(VADriverContextP ctx,
555                 struct vpp_gpe_context *vpp_gpe_ctx)
556 {
557     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
558
559     return VA_STATUS_SUCCESS;
560 }
561
562 static VAStatus
563 gen8_gpe_process(VADriverContextP ctx,
564                   struct vpp_gpe_context * vpp_gpe_ctx)
565 {
566     VAStatus va_status = VA_STATUS_SUCCESS;
567
568     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
569     if (va_status != VA_STATUS_SUCCESS)
570         return va_status;
571
572     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
573     if (va_status != VA_STATUS_SUCCESS)
574         return va_status;
575
576     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
577     if (va_status != VA_STATUS_SUCCESS)
578         return va_status;
579
580     return VA_STATUS_SUCCESS;
581 }
582
583 static VAStatus
584 vpp_gpe_process(VADriverContextP ctx,
585                   struct vpp_gpe_context * vpp_gpe_ctx)
586 {
587     struct i965_driver_data *i965 = i965_driver_data(ctx);
588     if (IS_HASWELL(i965->intel.device_info))
589        return gen75_gpe_process(ctx, vpp_gpe_ctx);
590     else if (IS_GEN8(i965->intel.device_info) ||
591              IS_GEN9(i965->intel.device_info))
592        return gen8_gpe_process(ctx, vpp_gpe_ctx);
593
594      return VA_STATUS_ERROR_UNIMPLEMENTED;
595 }
596
597 static VAStatus
598 vpp_gpe_process_sharpening(VADriverContextP ctx,
599                              struct vpp_gpe_context * vpp_gpe_ctx)
600 {
601      VAStatus va_status = VA_STATUS_SUCCESS;
602      struct i965_driver_data *i965 = i965_driver_data(ctx);
603      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
604      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
605
606      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
607      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
608      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
609
610      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
611        
612      if (!obj_buf ||
613          !obj_buf->buffer_store ||
614          !obj_buf->buffer_store->buffer)
615          goto error;
616
617      VAProcFilterParameterBuffer* filter =
618                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
619      float sharpening_intensity = filter->value;
620
621      ThreadParameterSharpening thr_param;
622      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
623      unsigned int i;
624      unsigned char * pos;
625
626      if(vpp_gpe_ctx->is_first_frame){
627          vpp_gpe_ctx->sub_shader_sum = 3;
628          struct i965_kernel * vpp_kernels;
629          if (IS_HASWELL(i965->intel.device_info))
630              vpp_kernels = gen75_vpp_sharpening_kernels;
631          else if (IS_GEN8(i965->intel.device_info) ||
632                   IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
633              vpp_kernels = gen8_vpp_sharpening_kernels;
634          else
635              return VA_STATUS_ERROR_UNIMPLEMENTED;
636
637          vpp_gpe_ctx->gpe_load_kernels(ctx,
638                                &vpp_gpe_ctx->gpe_ctx,
639                                vpp_kernels,
640                                vpp_gpe_ctx->sub_shader_sum);
641      }
642
643      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
644         va_status = i965_CreateSurfaces(ctx,
645                                        vpp_gpe_ctx->in_frame_w,
646                                        vpp_gpe_ctx->in_frame_h,
647                                        VA_RT_FORMAT_YUV420,
648                                        1,
649                                        &vpp_gpe_ctx->surface_tmp);
650        assert(va_status == VA_STATUS_SUCCESS);
651     
652        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
653        assert(obj_surf);
654
655        if (obj_surf) {
656            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
657                                        SUBSAMPLE_YUV420);
658            vpp_gpe_ctx->surface_tmp_object = obj_surf;
659        }
660     }                
661
662     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
663     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
664     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
665
666     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
667     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
668
669     /* Step 1: horizontal blur process */      
670     vpp_gpe_ctx->forward_surf_sum = 0;
671     vpp_gpe_ctx->backward_surf_sum = 0;
672  
673     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
674     vpp_gpe_ctx->thread_param_size = thr_param_size;
675     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
676                                                        *vpp_gpe_ctx->thread_num);
677     pos = vpp_gpe_ctx->thread_param;
678
679     if (!pos) {
680         return VA_STATUS_ERROR_ALLOCATION_FAILED;
681     }
682
683     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
684         thr_param.base.v_pos = 16 * i;
685         thr_param.base.h_pos = 0;
686         memcpy(pos, &thr_param, thr_param_size);
687         pos += thr_param_size;
688     }
689
690     vpp_gpe_ctx->sub_shader_index = 0;
691     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
692     free(vpp_gpe_ctx->thread_param);
693
694     /* Step 2: vertical blur process */ 
695     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
696     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
697     vpp_gpe_ctx->forward_surf_sum = 0;
698     vpp_gpe_ctx->backward_surf_sum = 0;
699  
700     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
701     vpp_gpe_ctx->thread_param_size = thr_param_size;
702     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
703                                                        *vpp_gpe_ctx->thread_num);
704     pos = vpp_gpe_ctx->thread_param;
705
706     if (!pos) {
707         return VA_STATUS_ERROR_ALLOCATION_FAILED;
708     }
709
710     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
711         thr_param.base.v_pos = 0;
712         thr_param.base.h_pos = 16 * i;
713         memcpy(pos, &thr_param, thr_param_size);
714         pos += thr_param_size;
715     }
716
717     vpp_gpe_ctx->sub_shader_index = 1;
718     vpp_gpe_process(ctx, vpp_gpe_ctx);
719     free(vpp_gpe_ctx->thread_param);
720
721     /* Step 3: apply the blur to original surface */      
722     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
723     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
724     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
725     vpp_gpe_ctx->forward_surf_sum  = 1;
726     vpp_gpe_ctx->backward_surf_sum = 0;
727  
728     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
729     vpp_gpe_ctx->thread_param_size = thr_param_size;
730     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
731                                                        *vpp_gpe_ctx->thread_num);
732     pos = vpp_gpe_ctx->thread_param;
733
734     if (!pos) {
735         return VA_STATUS_ERROR_ALLOCATION_FAILED;
736     }
737
738     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
739         thr_param.base.v_pos = 4 * i;
740         thr_param.base.h_pos = 0;
741         memcpy(pos, &thr_param, thr_param_size);
742         pos += thr_param_size;
743     }
744
745     vpp_gpe_ctx->sub_shader_index = 2;
746     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
747     free(vpp_gpe_ctx->thread_param);
748
749     return va_status;
750
751 error:
752     return VA_STATUS_ERROR_INVALID_PARAMETER;
753 }
754
755 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
756                     struct vpp_gpe_context * vpp_gpe_ctx)
757 {
758     VAStatus va_status = VA_STATUS_SUCCESS;
759     struct i965_driver_data *i965 = i965_driver_data(ctx);
760     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
761     VAProcFilterParameterBuffer* filter = NULL;
762     unsigned int i;
763     struct object_surface *obj_surface = NULL;
764
765     if (pipe->num_filters && !pipe->filters)
766         goto error;
767
768     for(i = 0; i < pipe->num_filters; i++){
769         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
770
771         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
772
773         if (!obj_buf ||
774             !obj_buf->buffer_store ||
775             !obj_buf->buffer_store->buffer)
776             goto error;
777
778         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
779         if(filter->type == VAProcFilterSharpening){
780            break;
781         }
782     }
783        
784     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
785     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
786
787     vpp_gpe_ctx->forward_surf_sum = 0;
788     vpp_gpe_ctx->backward_surf_sum = 0;
789  
790     for(i = 0; i < pipe->num_forward_references; i ++)
791     {
792         obj_surface = SURFACE(pipe->forward_references[i]);
793
794         assert(obj_surface);
795         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
796         vpp_gpe_ctx->forward_surf_sum++;
797     } 
798
799     for(i = 0; i < pipe->num_backward_references; i ++)
800     {
801         obj_surface = SURFACE(pipe->backward_references[i]);
802         
803         assert(obj_surface);
804         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
805         vpp_gpe_ctx->backward_surf_sum++;
806     } 
807
808     obj_surface = vpp_gpe_ctx->surface_input_object[0];
809     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
810     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
811
812     if(filter && filter->type == VAProcFilterSharpening) {
813        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx); 
814     } else {
815        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
816     }
817
818     vpp_gpe_ctx->is_first_frame = 0;
819
820     return va_status;
821
822 error:
823     return VA_STATUS_ERROR_INVALID_PARAMETER;
824 }
825
826 void 
827 vpp_gpe_context_destroy(VADriverContextP ctx,
828                                struct vpp_gpe_context *vpp_gpe_ctx)
829 {
830     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
831     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
832
833     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
834     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
835
836     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
837
838     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
839         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
840         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
841         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
842         vpp_gpe_ctx->surface_tmp_object = NULL;
843     }   
844
845     if (vpp_gpe_ctx->batch)
846         intel_batchbuffer_free(vpp_gpe_ctx->batch);
847
848     free(vpp_gpe_ctx);
849 }
850
851 struct vpp_gpe_context *
852 vpp_gpe_context_init(VADriverContextP ctx)
853 {
854     struct i965_driver_data *i965 = i965_driver_data(ctx);
855     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
856     assert(vpp_gpe_ctx);
857     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
858
859     assert(IS_HASWELL(i965->intel.device_info) ||
860            IS_GEN8(i965->intel.device_info) ||
861            IS_GEN9(i965->intel.device_info));
862
863     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
864     vpp_gpe_ctx->surface_tmp_object = NULL;
865     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
866     vpp_gpe_ctx->is_first_frame = 1;
867
868     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
869     gpe_ctx->vfe_state.num_urb_entries = 16;
870     gpe_ctx->vfe_state.gpgpu_mode = 0;
871     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
872     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
873  
874     if (IS_HASWELL(i965->intel.device_info)) {
875         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
876         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
877         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
878         gpe_ctx->surface_state_binding_table.length =
879                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
880
881         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
882         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
883         gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);
884
885     } else if (IS_GEN8(i965->intel.device_info) ||
886                IS_GEN9(i965->intel.device_info)) {
887         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
888         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
889         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
890         gpe_ctx->surface_state_binding_table.length =
891                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
892
893         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
894         gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
895         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
896     }
897
898     return vpp_gpe_ctx;
899 }
900