src/gen75_vpp_gpe.c

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sub license, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the
  13  * next paragraph) shall be included in all copies or substantial portions
  14  * of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26  */
  27
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <assert.h>
  32
  33 #include "intel_batchbuffer.h"
  34 #include "intel_driver.h"
  35
  36 #include "i965_structs.h"
  37 #include "i965_defines.h"
  38 #include "i965_drv_video.h"
  39 #include "gen75_vpp_gpe.h"
  40
  41 #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42 #define MAX_MEDIA_SURFACES_GEN6      34
  43
  44 #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45 #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46
  47 #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48 #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49
  50 #define CURBE_ALLOCATION_SIZE   37
  51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
  52 #define CURBE_URB_ENTRY_LENGTH  4
  53
  54 /* Shaders information for sharpening */
  55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  56 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  57 };
  58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  59 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  60 };
  61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  62 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  63 };
  64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  65     {
  66         "vpp: sharpening(horizontal blur)",
  67         VPP_GPE_SHARPENING,
  68         gen75_gpe_sharpening_h_blur,
  69         sizeof(gen75_gpe_sharpening_h_blur),
  70         NULL
  71     },
  72     {
  73         "vpp: sharpening(vertical blur)",
  74         VPP_GPE_SHARPENING,
  75         gen75_gpe_sharpening_v_blur,
  76         sizeof(gen75_gpe_sharpening_v_blur),
  77         NULL
  78     },
  79     {
  80         "vpp: sharpening(unmask)",
  81         VPP_GPE_SHARPENING,
  82         gen75_gpe_sharpening_unmask,
  83         sizeof(gen75_gpe_sharpening_unmask),
  84         NULL
  85     },
  86 };
  87
  88 /* sharpening kernels for Broadwell */
  89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
  90 #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
  91 };
  92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
  93 #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
  94 };
  95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
  96 #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
  97 };
  98
  99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
 100     {
 101         "vpp: sharpening(horizontal blur)",
 102         VPP_GPE_SHARPENING,
 103         gen8_gpe_sharpening_h_blur,
 104         sizeof(gen8_gpe_sharpening_h_blur),
 105         NULL
 106     },
 107     {
 108         "vpp: sharpening(vertical blur)",
 109         VPP_GPE_SHARPENING,
 110         gen8_gpe_sharpening_v_blur,
 111         sizeof(gen8_gpe_sharpening_v_blur),
 112         NULL
 113     },
 114     {
 115         "vpp: sharpening(unmask)",
 116         VPP_GPE_SHARPENING,
 117         gen8_gpe_sharpening_unmask,
 118         sizeof(gen8_gpe_sharpening_unmask),
 119         NULL
 120     },
 121 };
 122
 123 static VAStatus
 124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
 125                                  struct vpp_gpe_context *vpp_gpe_ctx)
 126 {
 127     struct object_surface *obj_surface;
 128     unsigned int i = 0;
 129     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 130                                        vpp_gpe_ctx->backward_surf_sum) * 2;
 131
 132     /* Binding input NV12 surfaces (Luma + Chroma)*/
 133     for (i = 0; i < input_surface_sum; i += 2) {
 134         obj_surface = vpp_gpe_ctx->surface_input_object[i / 2];
 135         assert(obj_surface);
 136         gen7_gpe_media_rw_surface_setup(ctx,
 137                                         &vpp_gpe_ctx->gpe_ctx,
 138                                         obj_surface,
 139                                         BINDING_TABLE_OFFSET_GEN7(i),
 140                                         SURFACE_STATE_OFFSET_GEN7(i),
 141                                         0);
 142
 143         gen75_gpe_media_chroma_surface_setup(ctx,
 144                                              &vpp_gpe_ctx->gpe_ctx,
 145                                              obj_surface,
 146                                              BINDING_TABLE_OFFSET_GEN7(i + 1),
 147                                              SURFACE_STATE_OFFSET_GEN7(i + 1),
 148                                              0);
 149     }
 150
 151     /* Binding output NV12 surface(Luma + Chroma) */
 152     obj_surface = vpp_gpe_ctx->surface_output_object;
 153     assert(obj_surface);
 154     gen7_gpe_media_rw_surface_setup(ctx,
 155                                     &vpp_gpe_ctx->gpe_ctx,
 156                                     obj_surface,
 157                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
 158                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum),
 159                                     1);
 160     gen75_gpe_media_chroma_surface_setup(ctx,
 161                                          &vpp_gpe_ctx->gpe_ctx,
 162                                          obj_surface,
 163                                          BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
 164                                          SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1),
 165                                          1);
 166     /* Bind kernel return buffer surface */
 167     gen7_gpe_buffer_suface_setup(ctx,
 168                                  &vpp_gpe_ctx->gpe_ctx,
 169                                  &vpp_gpe_ctx->vpp_kernel_return,
 170                                  BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
 171                                  SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
 172
 173     return VA_STATUS_SUCCESS;
 174 }
 175
 176 static VAStatus
 177 gen75_gpe_process_interface_setup(VADriverContextP ctx,
 178                                   struct vpp_gpe_context *vpp_gpe_ctx)
 179 {
 180     struct gen6_interface_descriptor_data *desc;
 181     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
 182     int i;
 183
 184     dri_bo_map(bo, 1);
 185     assert(bo->virtual);
 186     desc = bo->virtual;
 187
 188     /*Setup the descritor table*/
 189     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) {
 190         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 191         assert(sizeof(*desc) == 32);
 192         memset(desc, 0, sizeof(*desc));
 193         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
 194         desc->desc2.sampler_count = 0; /* FIXME: */
 195         desc->desc2.sampler_state_pointer = 0;
 196         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
 197         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
 198         desc->desc4.constant_urb_entry_read_offset = 0;
 199         desc->desc4.constant_urb_entry_read_length = 0;
 200
 201         dri_bo_emit_reloc(bo,
 202                           I915_GEM_DOMAIN_INSTRUCTION, 0,
 203                           0,
 204                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
 205                           kernel->bo);
 206         desc++;
 207     }
 208
 209     dri_bo_unmap(bo);
 210
 211     return VA_STATUS_SUCCESS;
 212 }
 213
 214 static VAStatus
 215 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
 216                                   struct vpp_gpe_context *vpp_gpe_ctx)
 217 {
 218     unsigned int *command_ptr;
 219     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 220     unsigned char* position = NULL;
 221
 222     /* Thread inline data setting*/
 223     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 224     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 225
 226     for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) {
 227         *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2));
 228         *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 229         *command_ptr++ = 0;
 230         *command_ptr++ = 0;
 231         *command_ptr++ = 0;
 232         *command_ptr++ = 0;
 233
 234         /* copy thread inline data */
 235         position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 236         memcpy(command_ptr, position, size);
 237         command_ptr += size / sizeof(int);
 238     }
 239
 240     *command_ptr++ = 0;
 241     *command_ptr++ = MI_BATCH_BUFFER_END;
 242
 243     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 244
 245     return VA_STATUS_SUCCESS;
 246 }
 247
 248 static VAStatus
 249 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
 250                                  struct vpp_gpe_context *vpp_gpe_ctx)
 251 {
 252     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 253     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 254
 255     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 256
 257     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 258
 259     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
 260     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
 261     OUT_RELOC(vpp_gpe_ctx->batch,
 262               vpp_gpe_ctx->vpp_batchbuffer.bo,
 263               I915_GEM_DOMAIN_COMMAND, 0,
 264               0);
 265     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 266
 267     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 268
 269     return VA_STATUS_SUCCESS;
 270 }
 271
 272 static VAStatus
 273 gen75_gpe_process_init(VADriverContextP ctx,
 274                        struct vpp_gpe_context *vpp_gpe_ctx)
 275 {
 276     struct i965_driver_data *i965 = i965_driver_data(ctx);
 277     dri_bo *bo;
 278
 279     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 280                                   (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 281
 282     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 283     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 284     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 285     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 286                                        * vpp_gpe_ctx->vpp_kernel_return.size_block;
 287
 288     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 289     bo = dri_bo_alloc(i965->intel.bufmgr,
 290                       "vpp batch buffer",
 291                       batch_buf_size, 0x1000);
 292     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 293
 294     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 295     bo = dri_bo_alloc(i965->intel.bufmgr,
 296                       "vpp kernel return buffer",
 297                       kernel_return_size, 0x1000);
 298     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 299
 300     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 301
 302     return VA_STATUS_SUCCESS;
 303 }
 304
 305 static VAStatus
 306 gen75_gpe_process_prepare(VADriverContextP ctx,
 307                           struct vpp_gpe_context *vpp_gpe_ctx)
 308 {
 309     /*Setup all the memory object*/
 310     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 311     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 312     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 313
 314     /*Programing media pipeline*/
 315     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 316
 317     return VA_STATUS_SUCCESS;
 318 }
 319
 320 static VAStatus
 321 gen75_gpe_process_run(VADriverContextP ctx,
 322                       struct vpp_gpe_context *vpp_gpe_ctx)
 323 {
 324     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 325
 326     return VA_STATUS_SUCCESS;
 327 }
 328
 329 static VAStatus
 330 gen75_gpe_process(VADriverContextP ctx,
 331                   struct vpp_gpe_context * vpp_gpe_ctx)
 332 {
 333     VAStatus va_status = VA_STATUS_SUCCESS;
 334
 335     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
 336     if (va_status != VA_STATUS_SUCCESS)
 337         return va_status;
 338
 339     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
 340     if (va_status != VA_STATUS_SUCCESS)
 341         return va_status;
 342
 343     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
 344     if (va_status != VA_STATUS_SUCCESS)
 345         return va_status;
 346
 347     return VA_STATUS_SUCCESS;
 348 }
 349
 350 static VAStatus
 351 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
 352                                 struct vpp_gpe_context *vpp_gpe_ctx)
 353 {
 354     struct object_surface *obj_surface;
 355     unsigned int i = 0;
 356     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
 357                                        vpp_gpe_ctx->backward_surf_sum) * 2;
 358
 359     /* Binding input NV12 surfaces (Luma + Chroma)*/
 360     for (i = 0; i < input_surface_sum; i += 2) {
 361         obj_surface = vpp_gpe_ctx->surface_input_object[i / 2];
 362         assert(obj_surface);
 363         gen8_gpe_media_rw_surface_setup(ctx,
 364                                         &vpp_gpe_ctx->gpe_ctx,
 365                                         obj_surface,
 366                                         BINDING_TABLE_OFFSET_GEN8(i),
 367                                         SURFACE_STATE_OFFSET_GEN8(i),
 368                                         0);
 369
 370         gen8_gpe_media_chroma_surface_setup(ctx,
 371                                             &vpp_gpe_ctx->gpe_ctx,
 372                                             obj_surface,
 373                                             BINDING_TABLE_OFFSET_GEN8(i + 1),
 374                                             SURFACE_STATE_OFFSET_GEN8(i + 1),
 375                                             0);
 376     }
 377
 378     /* Binding output NV12 surface(Luma + Chroma) */
 379     obj_surface = vpp_gpe_ctx->surface_output_object;
 380     assert(obj_surface);
 381     gen8_gpe_media_rw_surface_setup(ctx,
 382                                     &vpp_gpe_ctx->gpe_ctx,
 383                                     obj_surface,
 384                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
 385                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum),
 386                                     1);
 387     gen8_gpe_media_chroma_surface_setup(ctx,
 388                                         &vpp_gpe_ctx->gpe_ctx,
 389                                         obj_surface,
 390                                         BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
 391                                         SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1),
 392                                         1);
 393     /* Bind kernel return buffer surface */
 394     gen7_gpe_buffer_suface_setup(ctx,
 395                                  &vpp_gpe_ctx->gpe_ctx,
 396                                  &vpp_gpe_ctx->vpp_kernel_return,
 397                                  BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
 398                                  SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
 399
 400     return VA_STATUS_SUCCESS;
 401 }
 402
 403 static VAStatus
 404 gen8_gpe_process_interface_setup(VADriverContextP ctx,
 405                                  struct vpp_gpe_context *vpp_gpe_ctx)
 406 {
 407     struct gen8_interface_descriptor_data *desc;
 408     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
 409     int i;
 410
 411     dri_bo_map(bo, 1);
 412     assert(bo->virtual);
 413     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
 414                                                      + vpp_gpe_ctx->gpe_ctx.idrt.offset);
 415
 416     /*Setup the descritor table*/
 417     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) {
 418         struct i965_kernel *kernel;
 419         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
 420         assert(sizeof(*desc) == 32);
 421         /*Setup the descritor table*/
 422         memset(desc, 0, sizeof(*desc));
 423         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
 424         desc->desc3.sampler_count = 0; /* FIXME: */
 425         desc->desc3.sampler_state_pointer = 0;
 426         desc->desc4.binding_table_entry_count = 6; /* FIXME: */
 427         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
 428         desc->desc5.constant_urb_entry_read_offset = 0;
 429         desc->desc5.constant_urb_entry_read_length = 0;
 430
 431         desc++;
 432     }
 433
 434     dri_bo_unmap(bo);
 435
 436     return VA_STATUS_SUCCESS;
 437 }
 438
 439 static VAStatus
 440 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
 441                                  struct vpp_gpe_context *vpp_gpe_ctx)
 442 {
 443     unsigned int *command_ptr;
 444     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
 445     unsigned char* position = NULL;
 446
 447     /* Thread inline data setting*/
 448     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
 449     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
 450
 451     for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) {
 452         *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2));
 453         *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
 454         *command_ptr++ = 0;
 455         *command_ptr++ = 0;
 456         *command_ptr++ = 0;
 457         *command_ptr++ = 0;
 458
 459         /* copy thread inline data */
 460         position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
 461         memcpy(command_ptr, position, size);
 462         command_ptr += size / sizeof(int);
 463
 464         *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
 465         *command_ptr++ = 0;
 466     }
 467
 468     *command_ptr++ = 0;
 469     *command_ptr++ = MI_BATCH_BUFFER_END;
 470
 471     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
 472
 473     return VA_STATUS_SUCCESS;
 474 }
 475
 476 static VAStatus
 477 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
 478                                 struct vpp_gpe_context *vpp_gpe_ctx)
 479 {
 480     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
 481     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
 482
 483     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
 484
 485     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
 486
 487     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
 488     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
 489     OUT_RELOC(vpp_gpe_ctx->batch,
 490               vpp_gpe_ctx->vpp_batchbuffer.bo,
 491               I915_GEM_DOMAIN_COMMAND, 0,
 492               0);
 493     OUT_BATCH(vpp_gpe_ctx->batch, 0);
 494
 495     ADVANCE_BATCH(vpp_gpe_ctx->batch);
 496
 497     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
 498
 499     return VA_STATUS_SUCCESS;
 500 }
 501
 502 static VAStatus
 503 gen8_gpe_process_init(VADriverContextP ctx,
 504                       struct vpp_gpe_context *vpp_gpe_ctx)
 505 {
 506     struct i965_driver_data *i965 = i965_driver_data(ctx);
 507     dri_bo *bo;
 508
 509     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
 510                                   (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
 511
 512     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
 513     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
 514     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
 515
 516     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
 517                                        * vpp_gpe_ctx->vpp_kernel_return.size_block;
 518
 519     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 520     bo = dri_bo_alloc(i965->intel.bufmgr,
 521                       "vpp batch buffer",
 522                       batch_buf_size, 0x1000);
 523     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
 524
 525     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 526     bo = dri_bo_alloc(i965->intel.bufmgr,
 527                       "vpp kernel return buffer",
 528                       kernel_return_size, 0x1000);
 529     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
 530
 531     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
 532
 533     return VA_STATUS_SUCCESS;
 534 }
 535
 536 static VAStatus
 537 gen8_gpe_process_prepare(VADriverContextP ctx,
 538                          struct vpp_gpe_context *vpp_gpe_ctx)
 539 {
 540     /*Setup all the memory object*/
 541     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
 542     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
 543     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
 544
 545     /*Programing media pipeline*/
 546     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
 547
 548     return VA_STATUS_SUCCESS;
 549 }
 550
 551 static VAStatus
 552 gen8_gpe_process_run(VADriverContextP ctx,
 553                      struct vpp_gpe_context *vpp_gpe_ctx)
 554 {
 555     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
 556
 557     return VA_STATUS_SUCCESS;
 558 }
 559
 560 static VAStatus
 561 gen8_gpe_process(VADriverContextP ctx,
 562                  struct vpp_gpe_context * vpp_gpe_ctx)
 563 {
 564     VAStatus va_status = VA_STATUS_SUCCESS;
 565
 566     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
 567     if (va_status != VA_STATUS_SUCCESS)
 568         return va_status;
 569
 570     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
 571     if (va_status != VA_STATUS_SUCCESS)
 572         return va_status;
 573
 574     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
 575     if (va_status != VA_STATUS_SUCCESS)
 576         return va_status;
 577
 578     return VA_STATUS_SUCCESS;
 579 }
 580
 581 static VAStatus
 582 vpp_gpe_process(VADriverContextP ctx,
 583                 struct vpp_gpe_context * vpp_gpe_ctx)
 584 {
 585     struct i965_driver_data *i965 = i965_driver_data(ctx);
 586     if (IS_HASWELL(i965->intel.device_info))
 587         return gen75_gpe_process(ctx, vpp_gpe_ctx);
 588     else if (IS_GEN8(i965->intel.device_info) ||
 589              IS_GEN9(i965->intel.device_info) ||
 590              IS_GEN10(i965->intel.device_info))
 591         return gen8_gpe_process(ctx, vpp_gpe_ctx);
 592
 593     return VA_STATUS_ERROR_UNIMPLEMENTED;
 594 }
 595
 596 static VAStatus
 597 vpp_gpe_process_sharpening(VADriverContextP ctx,
 598                            struct vpp_gpe_context * vpp_gpe_ctx)
 599 {
 600     VAStatus va_status = VA_STATUS_SUCCESS;
 601     struct i965_driver_data *i965 = i965_driver_data(ctx);
 602     struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
 603     struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
 604
 605     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 606     VABufferID *filter_ids = (VABufferID*)pipe->filters ;
 607     struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
 608
 609     assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 610
 611     if (!obj_buf ||
 612         !obj_buf->buffer_store ||
 613         !obj_buf->buffer_store->buffer)
 614         goto error;
 615
 616     VAProcFilterParameterBuffer* filter =
 617         (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 618     float sharpening_intensity = filter->value;
 619
 620     ThreadParameterSharpening thr_param;
 621     unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
 622     unsigned int i;
 623     unsigned char * pos;
 624
 625     if (vpp_gpe_ctx->is_first_frame) {
 626         vpp_gpe_ctx->sub_shader_sum = 3;
 627         struct i965_kernel * vpp_kernels;
 628         if (IS_HASWELL(i965->intel.device_info))
 629             vpp_kernels = gen75_vpp_sharpening_kernels;
 630         else if (IS_GEN8(i965->intel.device_info) ||
 631                  IS_GEN9(i965->intel.device_info) ||
 632                  IS_GEN10(i965->intel.device_info))
 633             vpp_kernels = gen8_vpp_sharpening_kernels;
 634         else
 635             return VA_STATUS_ERROR_UNIMPLEMENTED;
 636
 637         vpp_gpe_ctx->gpe_load_kernels(ctx,
 638                                       &vpp_gpe_ctx->gpe_ctx,
 639                                       vpp_kernels,
 640                                       vpp_gpe_ctx->sub_shader_sum);
 641     }
 642
 643     if (vpp_gpe_ctx->surface_tmp == VA_INVALID_ID) {
 644         va_status = i965_CreateSurfaces(ctx,
 645                                         vpp_gpe_ctx->in_frame_w,
 646                                         vpp_gpe_ctx->in_frame_h,
 647                                         VA_RT_FORMAT_YUV420,
 648                                         1,
 649                                         &vpp_gpe_ctx->surface_tmp);
 650         assert(va_status == VA_STATUS_SUCCESS);
 651
 652         struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
 653         assert(obj_surf);
 654
 655         if (obj_surf) {
 656             i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
 657                                         SUBSAMPLE_YUV420);
 658             vpp_gpe_ctx->surface_tmp_object = obj_surf;
 659         }
 660     }
 661
 662     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
 663     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
 664     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
 665
 666     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
 667     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
 668
 669     /* Step 1: horizontal blur process */
 670     vpp_gpe_ctx->forward_surf_sum = 0;
 671     vpp_gpe_ctx->backward_surf_sum = 0;
 672
 673     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 16;
 674     vpp_gpe_ctx->thread_param_size = thr_param_size;
 675     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 676                                                         * vpp_gpe_ctx->thread_num);
 677     pos = vpp_gpe_ctx->thread_param;
 678
 679     if (!pos) {
 680         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 681     }
 682
 683     for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
 684         thr_param.base.v_pos = 16 * i;
 685         thr_param.base.h_pos = 0;
 686         memcpy(pos, &thr_param, thr_param_size);
 687         pos += thr_param_size;
 688     }
 689
 690     vpp_gpe_ctx->sub_shader_index = 0;
 691     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 692     free(vpp_gpe_ctx->thread_param);
 693
 694     /* Step 2: vertical blur process */
 695     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
 696     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
 697     vpp_gpe_ctx->forward_surf_sum = 0;
 698     vpp_gpe_ctx->backward_surf_sum = 0;
 699
 700     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w / 16;
 701     vpp_gpe_ctx->thread_param_size = thr_param_size;
 702     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 703                                                         * vpp_gpe_ctx->thread_num);
 704     pos = vpp_gpe_ctx->thread_param;
 705
 706     if (!pos) {
 707         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 708     }
 709
 710     for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
 711         thr_param.base.v_pos = 0;
 712         thr_param.base.h_pos = 16 * i;
 713         memcpy(pos, &thr_param, thr_param_size);
 714         pos += thr_param_size;
 715     }
 716
 717     vpp_gpe_ctx->sub_shader_index = 1;
 718     vpp_gpe_process(ctx, vpp_gpe_ctx);
 719     free(vpp_gpe_ctx->thread_param);
 720
 721     /* Step 3: apply the blur to original surface */
 722     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
 723     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
 724     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
 725     vpp_gpe_ctx->forward_surf_sum  = 1;
 726     vpp_gpe_ctx->backward_surf_sum = 0;
 727
 728     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 4;
 729     vpp_gpe_ctx->thread_param_size = thr_param_size;
 730     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
 731                                                         * vpp_gpe_ctx->thread_num);
 732     pos = vpp_gpe_ctx->thread_param;
 733
 734     if (!pos) {
 735         return VA_STATUS_ERROR_ALLOCATION_FAILED;
 736     }
 737
 738     for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
 739         thr_param.base.v_pos = 4 * i;
 740         thr_param.base.h_pos = 0;
 741         memcpy(pos, &thr_param, thr_param_size);
 742         pos += thr_param_size;
 743     }
 744
 745     vpp_gpe_ctx->sub_shader_index = 2;
 746     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
 747     free(vpp_gpe_ctx->thread_param);
 748
 749     return va_status;
 750
 751 error:
 752     return VA_STATUS_ERROR_INVALID_PARAMETER;
 753 }
 754
 755 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
 756                                  struct vpp_gpe_context * vpp_gpe_ctx)
 757 {
 758     VAStatus va_status = VA_STATUS_SUCCESS;
 759     struct i965_driver_data *i965 = i965_driver_data(ctx);
 760     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
 761     VAProcFilterParameterBuffer* filter = NULL;
 762     unsigned int i;
 763     struct object_surface *obj_surface = NULL;
 764
 765     if (pipe->num_filters && !pipe->filters)
 766         goto error;
 767
 768     for (i = 0; i < pipe->num_filters; i++) {
 769         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
 770
 771         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
 772
 773         if (!obj_buf ||
 774             !obj_buf->buffer_store ||
 775             !obj_buf->buffer_store->buffer)
 776             goto error;
 777
 778         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
 779         if (filter->type == VAProcFilterSharpening) {
 780             break;
 781         }
 782     }
 783
 784     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
 785     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
 786
 787     vpp_gpe_ctx->forward_surf_sum = 0;
 788     vpp_gpe_ctx->backward_surf_sum = 0;
 789
 790     for (i = 0; i < pipe->num_forward_references; i ++) {
 791         obj_surface = SURFACE(pipe->forward_references[i]);
 792
 793         assert(obj_surface);
 794         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
 795         vpp_gpe_ctx->forward_surf_sum++;
 796     }
 797
 798     for (i = 0; i < pipe->num_backward_references; i ++) {
 799         obj_surface = SURFACE(pipe->backward_references[i]);
 800
 801         assert(obj_surface);
 802         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
 803         vpp_gpe_ctx->backward_surf_sum++;
 804     }
 805
 806     obj_surface = vpp_gpe_ctx->surface_input_object[0];
 807     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
 808     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
 809
 810     if (filter && filter->type == VAProcFilterSharpening) {
 811         va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
 812     } else {
 813         va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
 814     }
 815
 816     vpp_gpe_ctx->is_first_frame = 0;
 817
 818     return va_status;
 819
 820 error:
 821     return VA_STATUS_ERROR_INVALID_PARAMETER;
 822 }
 823
 824 void
 825 vpp_gpe_context_destroy(VADriverContextP ctx,
 826                         struct vpp_gpe_context *vpp_gpe_ctx)
 827 {
 828     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
 829     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
 830
 831     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
 832     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
 833
 834     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
 835
 836     if (vpp_gpe_ctx->surface_tmp != VA_INVALID_ID) {
 837         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
 838         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
 839         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 840         vpp_gpe_ctx->surface_tmp_object = NULL;
 841     }
 842
 843     if (vpp_gpe_ctx->batch)
 844         intel_batchbuffer_free(vpp_gpe_ctx->batch);
 845
 846     free(vpp_gpe_ctx);
 847 }
 848
 849 struct vpp_gpe_context *
 850 vpp_gpe_context_init(VADriverContextP ctx)
 851 {
 852     struct i965_driver_data *i965 = i965_driver_data(ctx);
 853     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
 854     assert(vpp_gpe_ctx);
 855     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
 856
 857     assert(IS_HASWELL(i965->intel.device_info) ||
 858            IS_GEN8(i965->intel.device_info) ||
 859            IS_GEN9(i965->intel.device_info) ||
 860            IS_GEN10(i965->intel.device_info));
 861
 862     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
 863     vpp_gpe_ctx->surface_tmp_object = NULL;
 864     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
 865     vpp_gpe_ctx->is_first_frame = 1;
 866
 867     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
 868     gpe_ctx->vfe_state.num_urb_entries = 16;
 869     gpe_ctx->vfe_state.gpgpu_mode = 0;
 870     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
 871     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
 872
 873     if (IS_HASWELL(i965->intel.device_info)) {
 874         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
 875         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
 876         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
 877         gpe_ctx->surface_state_binding_table.length =
 878             (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 879
 880         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
 881         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
 882         gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);
 883
 884     } else if (IS_GEN8(i965->intel.device_info) ||
 885                IS_GEN9(i965->intel.device_info) ||
 886                IS_GEN10(i965->intel.device_info)) {
 887         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
 888         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
 889         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
 890         gpe_ctx->surface_state_binding_table.length =
 891             (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
 892
 893         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
 894         gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
 895         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
 896     }
 897
 898     return vpp_gpe_ctx;
 899 }
 900