src/gen75_vpp_gpe.c

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sub license, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial portions
  14  * of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <assert.h>
  32
  33 #include "intel_batchbuffer.h"
  34 #include "intel_driver.h"
  35
  36 #include "i965_structs.h"
  37 #include "i965_defines.h"
  38 #include "i965_drv_video.h"
  39 #include "gen75_vpp_gpe.h"
  40
  41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42 #define MAX_MEDIA_SURFACES_GEN6      34
  43
  44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46
  47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49
  50 #define CURBE_ALLOCATION_SIZE   37
  51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
  52 #define CURBE_URB_ENTRY_LENGTH  4
  53
  54 /* Shaders information for sharpening */
  55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  56    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  57 };
  58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  59    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  60 };
  61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  62    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  63 };
  64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  65     {
  66         "vpp: sharpening(horizontal blur)",
  67         VPP_GPE_SHARPENING,
  68         gen75_gpe_sharpening_h_blur,
  69         sizeof(gen75_gpe_sharpening_h_blur),
  70         NULL
  71     },
  72     {
  73         "vpp: sharpening(vertical blur)",
  74         VPP_GPE_SHARPENING,
  75         gen75_gpe_sharpening_v_blur,
  76         sizeof(gen75_gpe_sharpening_v_blur),
  77         NULL
  78     },
  79     {
  80         "vpp: sharpening(unmask)",
  81         VPP_GPE_SHARPENING,
  82         gen75_gpe_sharpening_unmask,
  83         sizeof(gen75_gpe_sharpening_unmask),
  84         NULL
  85     },
  86 };
  87
  88 /* sharpening kernels for Broadwell */
  89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
  90    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
  91 };
  92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
  93    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
  94 };
  95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
  96    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
  97 };
  98
  99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
 100     {
 101         "vpp: sharpening(horizontal blur)",
 102         VPP_GPE_SHARPENING,
 103         gen8_gpe_sharpening_h_blur,
 104         sizeof(gen8_gpe_sharpening_h_blur),
 105         NULL
 106     },
 107     {
 108         "vpp: sharpening(vertical blur)",
 109         VPP_GPE_SHARPENING,
 110         gen8_gpe_sharpening_v_blur,
 111         sizeof(gen8_gpe_sharpening_v_blur),
 112         NULL
 113     },
 114     {
 115         "vpp: sharpening(unmask)",
 116         VPP_GPE_SHARPENING,
 117         gen8_gpe_sharpening_unmask,
 118         sizeof(gen8_gpe_sharpening_unmask),
 119         NULL
 120     },
 121 };
 122
 123 static VAStatus
 124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
 125                    struct vpp_gpe_context *vpp_gpe_ctx)
 126 {
 127     struct object_surface *obj_surface;
 128     unsigned int i = 0;
 129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 130                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 131
 132     /* Binding input NV12 surfaces (Luma + Chroma)*/
 133     for( i = 0; i < input_surface_sum; i += 2){
 134          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 135          assert(obj_surface);
 136          gen7_gpe_media_rw_surface_setup(ctx,
 137                                          &vpp_gpe_ctx->gpe_ctx,
 138                                           obj_surface,
 139                                           BINDING_TABLE_OFFSET_GEN7(i),
 140                                           SURFACE_STATE_OFFSET_GEN7(i));
 141
 142          gen75_gpe_media_chroma_surface_setup(ctx,
 143                                           &vpp_gpe_ctx->gpe_ctx,
 144                                           obj_surface,
 145                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
 146                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
 147     }
 148
 149     /* Binding output NV12 surface(Luma + Chroma) */
 150     obj_surface = vpp_gpe_ctx->surface_output_object;
 151     assert(obj_surface);
 152     gen7_gpe_media_rw_surface_setup(ctx,
 153                                     &vpp_gpe_ctx->gpe_ctx,
 154                                     obj_surface,
 155                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
 156                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
 157     gen75_gpe_media_chroma_surface_setup(ctx,
 158                                     &vpp_gpe_ctx->gpe_ctx,
 159                                     obj_surface,
 160                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
 161                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
 162     /* Bind kernel return buffer surface */
 163     gen7_gpe_buffer_suface_setup(ctx,
 164                                   &vpp_gpe_ctx->gpe_ctx,
 165                                   &vpp_gpe_ctx->vpp_kernel_return,
 166                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
 167                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
 168
 169     return VA_STATUS_SUCCESS;
 170 }
 171
 172 static VAStatus
 173 gen75_gpe_process_interface_setup(VADriverContextP ctx,
 174                     struct vpp_gpe_context *vpp_gpe_ctx)
 175 {
 176     struct gen6_interface_descriptor_data *desc;
 177     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
 178     int i;
 179
 180     dri_bo_map(bo, 1);
 181     assert(bo->virtual);
 182     desc = bo->virtual;
 183
 184     /*Setup the descritor table*/
 185     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 186         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 187         assert(sizeof(*desc) == 32);
 188         memset(desc, 0, sizeof(*desc));
 189         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
 190         desc->desc2.sampler_count = 0; /* FIXME: */
 191         desc->desc2.sampler_state_pointer = 0;
 192         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
 193         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
 194         desc->desc4.constant_urb_entry_read_offset = 0;
 195         desc->desc4.constant_urb_entry_read_length = 0;
 196
 197         dri_bo_emit_reloc(bo,
 198                           I915_GEM_DOMAIN_INSTRUCTION, 0,
 199                           0,
 200                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
 201                           kernel->bo);
 202         desc++;
 203     }
 204
 205     dri_bo_unmap(bo);
 206
 207     return VA_STATUS_SUCCESS;
 208 }
 209
 210 static VAStatus
 211 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
 212                            struct vpp_gpe_context *vpp_gpe_ctx)
 213 {
 214     unsigned int *command_ptr;
 215     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 216     unsigned char* position = NULL;
 217
 218     /* Thread inline data setting*/
 219     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 220     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 221
 222     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 223     {
 224          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 225          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 226          *command_ptr++ = 0;
 227          *command_ptr++ = 0;
 228          *command_ptr++ = 0;
 229          *command_ptr++ = 0;
 230
 231          /* copy thread inline data */
 232          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 233          memcpy(command_ptr, position, size);
 234          command_ptr += size/sizeof(int);
 235     }
 236
 237     *command_ptr++ = 0;
 238     *command_ptr++ = MI_BATCH_BUFFER_END;
 239
 240     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 241
 242     return VA_STATUS_SUCCESS;
 243 }
 244
 245 static VAStatus
 246 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
 247                    struct vpp_gpe_context *vpp_gpe_ctx)
 248 {
 249     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 250     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 251
 252     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 253
 254     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 255
 256     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
 257     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
 258     OUT_RELOC(vpp_gpe_ctx->batch,
 259               vpp_gpe_ctx->vpp_batchbuffer.bo,
 260               I915_GEM_DOMAIN_COMMAND, 0,
 261               0);
 262     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 263
 264     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 265
 266     return VA_STATUS_SUCCESS;
 267 }
 268
 269 static VAStatus
 270 gen75_gpe_process_init(VADriverContextP ctx,
 271                  struct vpp_gpe_context *vpp_gpe_ctx)
 272 {
 273     struct i965_driver_data *i965 = i965_driver_data(ctx);
 274     dri_bo *bo;
 275
 276     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 277                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 278
 279     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 280     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 281     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 282     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 283            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 284
 285     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 286     bo = dri_bo_alloc(i965->intel.bufmgr,
 287                       "vpp batch buffer",
 288                        batch_buf_size, 0x1000);
 289     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 290
 291     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 292     bo = dri_bo_alloc(i965->intel.bufmgr,
 293                       "vpp kernel return buffer",
 294                        kernel_return_size, 0x1000);
 295     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 296
 297     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 298
 299     return VA_STATUS_SUCCESS;
 300 }
 301
 302 static VAStatus
 303 gen75_gpe_process_prepare(VADriverContextP ctx,
 304                     struct vpp_gpe_context *vpp_gpe_ctx)
 305 {
 306     /*Setup all the memory object*/
 307     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 308     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 309     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 310
 311     /*Programing media pipeline*/
 312     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 313
 314     return VA_STATUS_SUCCESS;
 315 }
 316
 317 static VAStatus
 318 gen75_gpe_process_run(VADriverContextP ctx,
 319                 struct vpp_gpe_context *vpp_gpe_ctx)
 320 {
 321     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 322
 323     return VA_STATUS_SUCCESS;
 324 }
 325
 326 static VAStatus
 327 gen75_gpe_process(VADriverContextP ctx,
 328                   struct vpp_gpe_context * vpp_gpe_ctx)
 329 {
 330     VAStatus va_status = VA_STATUS_SUCCESS;
 331
 332     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
 333     if (va_status != VA_STATUS_SUCCESS)
 334         return va_status;
 335
 336     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
 337     if (va_status != VA_STATUS_SUCCESS)
 338         return va_status;
 339
 340     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
 341     if (va_status != VA_STATUS_SUCCESS)
 342         return va_status;
 343
 344     return VA_STATUS_SUCCESS;
 345 }
 346
 347 static VAStatus
 348 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
 349                    struct vpp_gpe_context *vpp_gpe_ctx)
 350 {
 351     struct object_surface *obj_surface;
 352     unsigned int i = 0;
 353     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 354                                          vpp_gpe_ctx->backward_surf_sum) * 2;
 355
 356     /* Binding input NV12 surfaces (Luma + Chroma)*/
 357     for( i = 0; i < input_surface_sum; i += 2){
 358          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
 359          assert(obj_surface);
 360          gen8_gpe_media_rw_surface_setup(ctx,
 361                                          &vpp_gpe_ctx->gpe_ctx,
 362                                           obj_surface,
 363                                           BINDING_TABLE_OFFSET_GEN8(i),
 364                                           SURFACE_STATE_OFFSET_GEN8(i));
 365
 366          gen8_gpe_media_chroma_surface_setup(ctx,
 367                                           &vpp_gpe_ctx->gpe_ctx,
 368                                           obj_surface,
 369                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
 370                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
 371     }
 372
 373     /* Binding output NV12 surface(Luma + Chroma) */
 374     obj_surface = vpp_gpe_ctx->surface_output_object;
 375     assert(obj_surface);
 376     gen8_gpe_media_rw_surface_setup(ctx,
 377                                     &vpp_gpe_ctx->gpe_ctx,
 378                                     obj_surface,
 379                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
 380                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
 381     gen8_gpe_media_chroma_surface_setup(ctx,
 382                                     &vpp_gpe_ctx->gpe_ctx,
 383                                     obj_surface,
 384                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
 385                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
 386     /* Bind kernel return buffer surface */
 387     gen7_gpe_buffer_suface_setup(ctx,
 388                                   &vpp_gpe_ctx->gpe_ctx,
 389                                   &vpp_gpe_ctx->vpp_kernel_return,
 390                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
 391                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
 392
 393     return VA_STATUS_SUCCESS;
 394 }
 395
 396 static VAStatus
 397 gen8_gpe_process_interface_setup(VADriverContextP ctx,
 398                     struct vpp_gpe_context *vpp_gpe_ctx)
 399 {
 400     struct gen8_interface_descriptor_data *desc;
 401     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
 402     int i;
 403
 404     dri_bo_map(bo, 1);
 405     assert(bo->virtual);
 406     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
 407                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
 408
 409     /*Setup the descritor table*/
 410     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
 411         struct i965_kernel *kernel;
 412         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 413         assert(sizeof(*desc) == 32);
 414         /*Setup the descritor table*/
 415          memset(desc, 0, sizeof(*desc));
 416          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
 417          desc->desc3.sampler_count = 0; /* FIXME: */
 418          desc->desc3.sampler_state_pointer = 0;
 419          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
 420          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
 421          desc->desc5.constant_urb_entry_read_offset = 0;
 422          desc->desc5.constant_urb_entry_read_length = 0;
 423
 424          desc++;
 425     }
 426
 427     dri_bo_unmap(bo);
 428
 429     return VA_STATUS_SUCCESS;
 430 }
 431
 432 static VAStatus
 433 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
 434                            struct vpp_gpe_context *vpp_gpe_ctx)
 435 {
 436     unsigned int *command_ptr;
 437     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 438     unsigned char* position = NULL;
 439
 440     /* Thread inline data setting*/
 441     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 442     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 443
 444     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
 445     {
 446          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
 447          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 448          *command_ptr++ = 0;
 449          *command_ptr++ = 0;
 450          *command_ptr++ = 0;
 451          *command_ptr++ = 0;
 452
 453          /* copy thread inline data */
 454          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 455          memcpy(command_ptr, position, size);
 456          command_ptr += size/sizeof(int);
 457
 458          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
 459          *command_ptr++ = 0;
 460     }
 461
 462     *command_ptr++ = 0;
 463     *command_ptr++ = MI_BATCH_BUFFER_END;
 464
 465     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 466
 467     return VA_STATUS_SUCCESS;
 468 }
 469
 470 static VAStatus
 471 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
 472                    struct vpp_gpe_context *vpp_gpe_ctx)
 473 {
 474     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 475     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 476
 477     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 478
 479     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 480
 481     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
 482     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
 483     OUT_RELOC(vpp_gpe_ctx->batch,
 484               vpp_gpe_ctx->vpp_batchbuffer.bo,
 485               I915_GEM_DOMAIN_COMMAND, 0,
 486               0);
 487     OUT_BATCH(vpp_gpe_ctx->batch, 0);
 488
 489     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 490
 491     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 492
 493     return VA_STATUS_SUCCESS;
 494 }
 495
 496 static VAStatus
 497 gen8_gpe_process_init(VADriverContextP ctx,
 498                  struct vpp_gpe_context *vpp_gpe_ctx)
 499 {
 500     struct i965_driver_data *i965 = i965_driver_data(ctx);
 501     dri_bo *bo;
 502
 503     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 504                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 505
 506     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 507     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 508     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 509
 510     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 511            * vpp_gpe_ctx->vpp_kernel_return.size_block;
 512
 513     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 514     bo = dri_bo_alloc(i965->intel.bufmgr,
 515                       "vpp batch buffer",
 516                        batch_buf_size, 0x1000);
 517     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 518
 519     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 520     bo = dri_bo_alloc(i965->intel.bufmgr,
 521                       "vpp kernel return buffer",
 522                        kernel_return_size, 0x1000);
 523     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 524
 525     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 526
 527     return VA_STATUS_SUCCESS;
 528 }
 529
 530 static VAStatus
 531 gen8_gpe_process_prepare(VADriverContextP ctx,
 532                     struct vpp_gpe_context *vpp_gpe_ctx)
 533 {
 534     /*Setup all the memory object*/
 535     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 536     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 537     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 538
 539     /*Programing media pipeline*/
 540     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 541
 542     return VA_STATUS_SUCCESS;
 543 }
 544
 545 static VAStatus
 546 gen8_gpe_process_run(VADriverContextP ctx,
 547                 struct vpp_gpe_context *vpp_gpe_ctx)
 548 {
 549     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 550
 551     return VA_STATUS_SUCCESS;
 552 }
 553
 554 static VAStatus
 555 gen8_gpe_process(VADriverContextP ctx,
 556                   struct vpp_gpe_context * vpp_gpe_ctx)
 557 {
 558     VAStatus va_status = VA_STATUS_SUCCESS;
 559
 560     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
 561     if (va_status != VA_STATUS_SUCCESS)
 562         return va_status;
 563
 564     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
 565     if (va_status != VA_STATUS_SUCCESS)
 566         return va_status;
 567
 568     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
 569     if (va_status != VA_STATUS_SUCCESS)
 570         return va_status;
 571
 572     return VA_STATUS_SUCCESS;
 573 }
 574
 575 static VAStatus
 576 vpp_gpe_process(VADriverContextP ctx,
 577                   struct vpp_gpe_context * vpp_gpe_ctx)
 578 {
 579     struct i965_driver_data *i965 = i965_driver_data(ctx);
 580     if (IS_HASWELL(i965->intel.device_info))
 581        return gen75_gpe_process(ctx, vpp_gpe_ctx);
 582     else if (IS_GEN8(i965->intel.device_info) ||
 583              IS_GEN9(i965->intel.device_info))
 584        return gen8_gpe_process(ctx, vpp_gpe_ctx);
 585
 586      return VA_STATUS_ERROR_UNIMPLEMENTED;
 587 }
 588
 589 static VAStatus
 590 vpp_gpe_process_sharpening(VADriverContextP ctx,
 591                              struct vpp_gpe_context * vpp_gpe_ctx)
 592 {
 593      VAStatus va_status = VA_STATUS_SUCCESS;
 594      struct i965_driver_data *i965 = i965_driver_data(ctx);
 595      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
 596      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
 597
 598      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 599      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
 600      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
 601
 602      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 603
 604      if (!obj_buf ||
 605          !obj_buf->buffer_store ||
 606          !obj_buf->buffer_store->buffer)
 607          goto error;
 608
 609      VAProcFilterParameterBuffer* filter =
 610                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 611      float sharpening_intensity = filter->value;
 612
 613      ThreadParameterSharpening thr_param;
 614      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
 615      unsigned int i;
 616      unsigned char * pos;
 617
 618      if(vpp_gpe_ctx->is_first_frame){
 619          vpp_gpe_ctx->sub_shader_sum = 3;
 620          struct i965_kernel * vpp_kernels;
 621          if (IS_HASWELL(i965->intel.device_info))
 622              vpp_kernels = gen75_vpp_sharpening_kernels;
 623          else if (IS_GEN8(i965->intel.device_info) ||
 624                   IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
 625              vpp_kernels = gen8_vpp_sharpening_kernels;
 626          else
 627              return VA_STATUS_ERROR_UNIMPLEMENTED;
 628
 629          vpp_gpe_ctx->gpe_load_kernels(ctx,
 630                                &vpp_gpe_ctx->gpe_ctx,
 631                                vpp_kernels,
 632                                vpp_gpe_ctx->sub_shader_sum);
 633      }
 634
 635      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
 636         va_status = i965_CreateSurfaces(ctx,
 637                                        vpp_gpe_ctx->in_frame_w,
 638                                        vpp_gpe_ctx->in_frame_h,
 639                                        VA_RT_FORMAT_YUV420,
 640                                        1,
 641                                        &vpp_gpe_ctx->surface_tmp);
 642        assert(va_status == VA_STATUS_SUCCESS);
 643
 644        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
 645        assert(obj_surf);
 646
 647        if (obj_surf) {
 648            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
 649                                        SUBSAMPLE_YUV420);
 650            vpp_gpe_ctx->surface_tmp_object = obj_surf;
 651        }
 652     }
 653
 654     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
 655     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
 656     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
 657
 658     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
 659     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
 660
 661     /* Step 1: horizontal blur process */
 662     vpp_gpe_ctx->forward_surf_sum = 0;
 663     vpp_gpe_ctx->backward_surf_sum = 0;
 664
 665     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
 666     vpp_gpe_ctx->thread_param_size = thr_param_size;
 667     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 668                                                        *vpp_gpe_ctx->thread_num);
 669     pos = vpp_gpe_ctx->thread_param;
 670
 671     if (!pos) {
 672         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 673     }
 674
 675     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 676         thr_param.base.v_pos = 16 * i;
 677         thr_param.base.h_pos = 0;
 678         memcpy(pos, &thr_param, thr_param_size);
 679         pos += thr_param_size;
 680     }
 681
 682     vpp_gpe_ctx->sub_shader_index = 0;
 683     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 684     free(vpp_gpe_ctx->thread_param);
 685
 686     /* Step 2: vertical blur process */
 687     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
 688     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
 689     vpp_gpe_ctx->forward_surf_sum = 0;
 690     vpp_gpe_ctx->backward_surf_sum = 0;
 691
 692     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
 693     vpp_gpe_ctx->thread_param_size = thr_param_size;
 694     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 695                                                        *vpp_gpe_ctx->thread_num);
 696     pos = vpp_gpe_ctx->thread_param;
 697
 698     if (!pos) {
 699         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 700     }
 701
 702     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 703         thr_param.base.v_pos = 0;
 704         thr_param.base.h_pos = 16 * i;
 705         memcpy(pos, &thr_param, thr_param_size);
 706         pos += thr_param_size;
 707     }
 708
 709     vpp_gpe_ctx->sub_shader_index = 1;
 710     vpp_gpe_process(ctx, vpp_gpe_ctx);
 711     free(vpp_gpe_ctx->thread_param);
 712
 713     /* Step 3: apply the blur to original surface */
 714     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
 715     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
 716     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
 717     vpp_gpe_ctx->forward_surf_sum  = 1;
 718     vpp_gpe_ctx->backward_surf_sum = 0;
 719
 720     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
 721     vpp_gpe_ctx->thread_param_size = thr_param_size;
 722     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 723                                                        *vpp_gpe_ctx->thread_num);
 724     pos = vpp_gpe_ctx->thread_param;
 725
 726     if (!pos) {
 727         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 728     }
 729
 730     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
 731         thr_param.base.v_pos = 4 * i;
 732         thr_param.base.h_pos = 0;
 733         memcpy(pos, &thr_param, thr_param_size);
 734         pos += thr_param_size;
 735     }
 736
 737     vpp_gpe_ctx->sub_shader_index = 2;
 738     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 739     free(vpp_gpe_ctx->thread_param);
 740
 741     return va_status;
 742
 743 error:
 744     return VA_STATUS_ERROR_INVALID_PARAMETER;
 745 }
 746
 747 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
 748                     struct vpp_gpe_context * vpp_gpe_ctx)
 749 {
 750     VAStatus va_status = VA_STATUS_SUCCESS;
 751     struct i965_driver_data *i965 = i965_driver_data(ctx);
 752     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 753     VAProcFilterParameterBuffer* filter = NULL;
 754     unsigned int i;
 755     struct object_surface *obj_surface = NULL;
 756
 757     if (pipe->num_filters && !pipe->filters)
 758         goto error;
 759
 760     for(i = 0; i < pipe->num_filters; i++){
 761         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
 762
 763         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 764
 765         if (!obj_buf ||
 766             !obj_buf->buffer_store ||
 767             !obj_buf->buffer_store->buffer)
 768             goto error;
 769
 770         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 771         if(filter->type == VAProcFilterSharpening){
 772            break;
 773         }
 774     }
 775
 776     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
 777     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
 778
 779     vpp_gpe_ctx->forward_surf_sum = 0;
 780     vpp_gpe_ctx->backward_surf_sum = 0;
 781
 782     for(i = 0; i < pipe->num_forward_references; i ++)
 783     {
 784         obj_surface = SURFACE(pipe->forward_references[i]);
 785
 786         assert(obj_surface);
 787         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
 788         vpp_gpe_ctx->forward_surf_sum++;
 789     }
 790
 791     for(i = 0; i < pipe->num_backward_references; i ++)
 792     {
 793         obj_surface = SURFACE(pipe->backward_references[i]);
 794
 795         assert(obj_surface);
 796         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
 797         vpp_gpe_ctx->backward_surf_sum++;
 798     }
 799
 800     obj_surface = vpp_gpe_ctx->surface_input_object[0];
 801     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
 802     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
 803
 804     if(filter && filter->type == VAProcFilterSharpening) {
 805        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
 806     } else {
 807        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
 808     }
 809
 810     vpp_gpe_ctx->is_first_frame = 0;
 811
 812     return va_status;
 813
 814 error:
 815     return VA_STATUS_ERROR_INVALID_PARAMETER;
 816 }
 817
 818 void
 819 vpp_gpe_context_destroy(VADriverContextP ctx,
 820                                struct vpp_gpe_context *vpp_gpe_ctx)
 821 {
 822     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 823     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
 824
 825     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 826     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
 827
 828     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
 829
 830     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
 831         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
 832         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
 833         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 834         vpp_gpe_ctx->surface_tmp_object = NULL;
 835     }
 836
 837     if (vpp_gpe_ctx->batch)
 838         intel_batchbuffer_free(vpp_gpe_ctx->batch);
 839
 840     free(vpp_gpe_ctx);
 841 }
 842
 843 struct vpp_gpe_context *
 844 vpp_gpe_context_init(VADriverContextP ctx)
 845 {
 846     struct i965_driver_data *i965 = i965_driver_data(ctx);
 847     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
 848     assert(vpp_gpe_ctx);
 849     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
 850
 851     assert(IS_HASWELL(i965->intel.device_info) ||
 852            IS_GEN8(i965->intel.device_info) ||
 853            IS_GEN9(i965->intel.device_info));
 854
 855     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 856     vpp_gpe_ctx->surface_tmp_object = NULL;
 857     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
 858     vpp_gpe_ctx->is_first_frame = 1;
 859
 860     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
 861     gpe_ctx->vfe_state.num_urb_entries = 16;
 862     gpe_ctx->vfe_state.gpgpu_mode = 0;
 863     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
 864     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 865
 866     if (IS_HASWELL(i965->intel.device_info)) {
 867         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
 868         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
 869         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
 870         gpe_ctx->surface_state_binding_table.length =
 871                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 872
 873         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
 874         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
 875         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
 876
 877     } else if (IS_GEN8(i965->intel.device_info) ||
 878                IS_GEN9(i965->intel.device_info)) {
 879         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
 880         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
 881         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
 882         gpe_ctx->surface_state_binding_table.length =
 883                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 884
 885         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
 886         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
 887
 888     }
 889
 890     return vpp_gpe_ctx;
 891 }
 892