OSDN Git Service

android: avoid compile warnnings
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vme.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "gen6_vme.h"
42 #include "gen6_mfc.h"
43 #include "gen9_mfc.h"
44 #include "intel_media.h"
45 #include "gen9_vp9_encapi.h"
46 #include "i965_post_processing.h"
47 #include "i965_encoder_api.h"
48
49 #ifdef SURFACE_STATE_PADDED_SIZE
50 #undef SURFACE_STATE_PADDED_SIZE
51 #endif
52
53 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
54 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
55 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
56
57 #define VME_INTRA_SHADER        0
58 #define VME_INTER_SHADER        1
59 #define VME_BINTER_SHADER       2
60
61 #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
62 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
63 #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
64
65 #define VME_MSG_LENGTH          32
66
67 static const uint32_t gen9_vme_intra_frame[][4] = {
68 #include "shaders/vme/intra_frame_gen9.g9b"
69 };
70
71 static const uint32_t gen9_vme_inter_frame[][4] = {
72 #include "shaders/vme/inter_frame_gen9.g9b"
73 };
74
75 static const uint32_t gen9_vme_inter_bframe[][4] = {
76 #include "shaders/vme/inter_bframe_gen9.g9b"
77 };
78
79 static struct i965_kernel gen9_vme_kernels[] = {
80     {
81         "VME Intra Frame",
82         VME_INTRA_SHADER, /*index*/
83         gen9_vme_intra_frame,
84         sizeof(gen9_vme_intra_frame),
85         NULL
86     },
87     {
88         "VME inter Frame",
89         VME_INTER_SHADER,
90         gen9_vme_inter_frame,
91         sizeof(gen9_vme_inter_frame),
92         NULL
93     },
94     {
95         "VME inter BFrame",
96         VME_BINTER_SHADER,
97         gen9_vme_inter_bframe,
98         sizeof(gen9_vme_inter_bframe),
99         NULL
100     }
101 };
102
103 static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = {
104 #include "shaders/vme/intra_frame_gen9.g9b"
105 };
106
107 static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = {
108 #include "shaders/vme/mpeg2_inter_gen9.g9b"
109 };
110
111 static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
112     {
113         "VME Intra Frame",
114         VME_INTRA_SHADER, /*index*/
115         gen9_vme_mpeg2_intra_frame,
116         sizeof(gen9_vme_mpeg2_intra_frame),
117         NULL
118     },
119     {
120         "VME inter Frame",
121         VME_INTER_SHADER,
122         gen9_vme_mpeg2_inter_frame,
123         sizeof(gen9_vme_mpeg2_inter_frame),
124         NULL
125     },
126 };
127
128 static const uint32_t gen9_vme_vp8_intra_frame[][4] = {
129 #include "shaders/vme/vp8_intra_frame_gen9.g9b"
130 };
131
132 static const uint32_t gen9_vme_vp8_inter_frame[][4] = {
133 #include "shaders/vme/vp8_inter_frame_gen9.g9b"
134 };
135
136 static struct i965_kernel gen9_vme_vp8_kernels[] = {
137     {
138         "VME Intra Frame",
139         VME_INTRA_SHADER, /*index*/
140         gen9_vme_vp8_intra_frame,
141         sizeof(gen9_vme_vp8_intra_frame),
142         NULL
143     },
144     {
145         "VME inter Frame",
146         VME_INTER_SHADER,
147         gen9_vme_vp8_inter_frame,
148         sizeof(gen9_vme_vp8_inter_frame),
149         NULL
150     },
151 };
152
153 /* HEVC */
154
155 static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
156 #include "shaders/vme/intra_frame_gen9.g9b"
157 };
158
159 static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
160 #include "shaders/vme/inter_frame_gen9.g9b"
161 };
162
163 static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
164 #include "shaders/vme/inter_bframe_gen9.g9b"
165 };
166
167 static struct i965_kernel gen9_vme_hevc_kernels[] = {
168     {
169         "VME Intra Frame",
170         VME_INTRA_SHADER, /*index*/
171         gen9_vme_hevc_intra_frame,
172         sizeof(gen9_vme_hevc_intra_frame),
173         NULL
174     },
175     {
176         "VME inter Frame",
177         VME_INTER_SHADER,
178         gen9_vme_hevc_inter_frame,
179         sizeof(gen9_vme_hevc_inter_frame),
180         NULL
181     },
182     {
183         "VME inter BFrame",
184         VME_BINTER_SHADER,
185         gen9_vme_hevc_inter_bframe,
186         sizeof(gen9_vme_hevc_inter_bframe),
187         NULL
188     }
189 };
190 /* only used for VME source surface state */
191 static void
192 gen9_vme_source_surface_state(VADriverContextP ctx,
193                               int index,
194                               struct object_surface *obj_surface,
195                               struct intel_encoder_context *encoder_context)
196 {
197     struct gen6_vme_context *vme_context = encoder_context->vme_context;
198
199     vme_context->vme_surface2_setup(ctx,
200                                     &vme_context->gpe_context,
201                                     obj_surface,
202                                     BINDING_TABLE_OFFSET(index),
203                                     SURFACE_STATE_OFFSET(index));
204 }
205
206 static void
207 gen9_vme_media_source_surface_state(VADriverContextP ctx,
208                                     int index,
209                                     struct object_surface *obj_surface,
210                                     struct intel_encoder_context *encoder_context)
211 {
212     struct gen6_vme_context *vme_context = encoder_context->vme_context;
213
214     vme_context->vme_media_rw_surface_setup(ctx,
215                                             &vme_context->gpe_context,
216                                             obj_surface,
217                                             BINDING_TABLE_OFFSET(index),
218                                             SURFACE_STATE_OFFSET(index),
219                                             0);
220 }
221
222 static void
223 gen9_vme_media_chroma_source_surface_state(VADriverContextP ctx,
224                                            int index,
225                                            struct object_surface *obj_surface,
226                                            struct intel_encoder_context *encoder_context)
227 {
228     struct gen6_vme_context *vme_context = encoder_context->vme_context;
229
230     vme_context->vme_media_chroma_surface_setup(ctx,
231                                                 &vme_context->gpe_context,
232                                                 obj_surface,
233                                                 BINDING_TABLE_OFFSET(index),
234                                                 SURFACE_STATE_OFFSET(index),
235                                                 0);
236 }
237
238 static void
239 gen9_vme_output_buffer_setup(VADriverContextP ctx,
240                              struct encode_state *encode_state,
241                              int index,
242                              struct intel_encoder_context *encoder_context,
243                              int is_intra,
244                              int width_in_mbs,
245                              int height_in_mbs)
246
247 {
248     struct i965_driver_data *i965 = i965_driver_data(ctx);
249     struct gen6_vme_context *vme_context = encoder_context->vme_context;
250
251     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
252     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
253
254     if (is_intra)
255         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
256     else
257         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
258     /*
259      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
260      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
261      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
262      */
263
264     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
265                                               "VME output buffer",
266                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
267                                               0x1000);
268     assert(vme_context->vme_output.bo);
269     vme_context->vme_buffer_suface_setup(ctx,
270                                          &vme_context->gpe_context,
271                                          &vme_context->vme_output,
272                                          BINDING_TABLE_OFFSET(index),
273                                          SURFACE_STATE_OFFSET(index));
274 }
275
276 static void
277 gen9_vme_avc_output_buffer_setup(VADriverContextP ctx,
278                                  struct encode_state *encode_state,
279                                  int index,
280                                  struct intel_encoder_context *encoder_context)
281 {
282     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
283     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
284     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
285     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
286     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
287
288     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
289
290 }
291
292 static void
293 gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
294                                       struct encode_state *encode_state,
295                                       int index,
296                                       struct intel_encoder_context *encoder_context,
297                                       int width_in_mbs,
298                                       int height_in_mbs)
299 {
300     struct i965_driver_data *i965 = i965_driver_data(ctx);
301     struct gen6_vme_context *vme_context = encoder_context->vme_context;
302
303     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
304     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
305     vme_context->vme_batchbuffer.pitch = 16;
306     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
307                                                    "VME batchbuffer",
308                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
309                                                    0x1000);
310     vme_context->vme_buffer_suface_setup(ctx,
311                                          &vme_context->gpe_context,
312                                          &vme_context->vme_batchbuffer,
313                                          BINDING_TABLE_OFFSET(index),
314                                          SURFACE_STATE_OFFSET(index));
315 }
316
317 static void
318 gen9_vme_avc_output_vme_batchbuffer_setup(VADriverContextP ctx,
319                                           struct encode_state *encode_state,
320                                           int index,
321                                           struct intel_encoder_context *encoder_context)
322 {
323     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
324     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
325     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
326
327     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
328 }
329
330
331 static VAStatus
332 gen9_vme_surface_setup(VADriverContextP ctx,
333                        struct encode_state *encode_state,
334                        int is_intra,
335                        struct intel_encoder_context *encoder_context)
336 {
337     struct object_surface *obj_surface;
338
339     /*Setup surfaces state*/
340     /* current picture for encoding */
341     obj_surface = encode_state->input_yuv_object;
342     assert(obj_surface);
343     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
344     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
345     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
346
347     if (!is_intra) {
348         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
349         int slice_type;
350
351         slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
352         assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
353
354         intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
355
356         if (slice_type == SLICE_TYPE_B)
357             intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
358     }
359
360     /* VME output */
361     gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
362     gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
363     intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
364                                   BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
365                                   SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
366
367     return VA_STATUS_SUCCESS;
368 }
369
370 static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
371                                          struct encode_state *encode_state,
372                                          struct intel_encoder_context *encoder_context)
373 {
374     struct gen6_vme_context *vme_context = encoder_context->vme_context;
375     struct gen8_interface_descriptor_data *desc;
376     int i;
377     dri_bo *bo;
378     unsigned char *desc_ptr;
379
380     bo = vme_context->gpe_context.idrt.bo;
381     dri_bo_map(bo, 1);
382     assert(bo->virtual);
383     desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;
384
385     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
386
387     for (i = 0; i < vme_context->vme_kernel_sum; i++) {
388         struct i965_kernel *kernel;
389         kernel = &vme_context->gpe_context.kernels[i];
390         assert(sizeof(*desc) == 32);
391         /*Setup the descritor table*/
392         memset(desc, 0, sizeof(*desc));
393         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
394         desc->desc3.sampler_count = 0; /* FIXME: */
395         desc->desc3.sampler_state_pointer = 0;
396         desc->desc4.binding_table_entry_count = 1; /* FIXME: */
397         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
398         desc->desc5.constant_urb_entry_read_offset = 0;
399         desc->desc5.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
400
401         desc++;
402     }
403
404     dri_bo_unmap(bo);
405
406     return VA_STATUS_SUCCESS;
407 }
408
409 static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
410                                         struct encode_state *encode_state,
411                                         struct intel_encoder_context *encoder_context,
412                                         int denom)
413 {
414     struct gen6_vme_context *vme_context = encoder_context->vme_context;
415     unsigned char *constant_buffer;
416     unsigned int *vme_state_message;
417     int mv_num = 32;
418
419     vme_state_message = (unsigned int *)vme_context->vme_state_message;
420
421     if (encoder_context->codec == CODEC_H264 ||
422         encoder_context->codec == CODEC_H264_MVC) {
423         if (vme_context->h264_level >= 30) {
424             mv_num = 16 / denom;
425
426             if (vme_context->h264_level >= 31)
427                 mv_num = 8 / denom;
428         }
429     } else if (encoder_context->codec == CODEC_MPEG2) {
430         mv_num = 2 / denom;
431     } else if (encoder_context->codec == CODEC_HEVC) {
432         if (vme_context->hevc_level >= 30 * 3) {
433             mv_num = 16;
434
435             if (vme_context->hevc_level >= 31 * 3)
436                 mv_num = 8;
437         }/* use the avc level setting */
438     }
439
440     vme_state_message[31] = mv_num;
441
442     dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
443     assert(vme_context->gpe_context.curbe.bo->virtual);
444     constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
445                       vme_context->gpe_context.curbe.offset;
446
447     /* VME MV/Mb cost table is passed by using const buffer */
448     /* Now it uses the fixed search path. So it is constructed directly
449      * in the GPU shader.
450      */
451     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
452
453     dri_bo_unmap(vme_context->gpe_context.curbe.bo);
454
455     return VA_STATUS_SUCCESS;
456 }
457
458 #define     MB_SCOREBOARD_A     (1 << 0)
459 #define     MB_SCOREBOARD_B     (1 << 1)
460 #define     MB_SCOREBOARD_C     (1 << 2)
461
462 /* check whether the mb of (x_index, y_index) is out of bound */
463 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
464 {
465     int mb_index;
466     if (x_index < 0 || x_index >= mb_width)
467         return -1;
468     if (y_index < 0 || y_index >= mb_height)
469         return -1;
470
471     mb_index = y_index * mb_width + x_index;
472     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
473         return -1;
474     return 0;
475 }
476
477 static void
478 gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
479                                        struct encode_state *encode_state,
480                                        int mb_width, int mb_height,
481                                        int kernel,
482                                        int transform_8x8_mode_flag,
483                                        struct intel_encoder_context *encoder_context)
484 {
485     struct gen6_vme_context *vme_context = encoder_context->vme_context;
486     int mb_row;
487     int s;
488     unsigned int *command_ptr;
489
490 #define     USE_SCOREBOARD      (1 << 21)
491
492     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
493     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
494
495     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
496         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
497         int first_mb = pSliceParameter->macroblock_address;
498         int num_mb = pSliceParameter->num_macroblocks;
499         unsigned int mb_intra_ub, score_dep;
500         int x_outer, y_outer, x_inner, y_inner;
501         int xtemp_outer = 0;
502
503         x_outer = first_mb % mb_width;
504         y_outer = first_mb / mb_width;
505         mb_row = y_outer;
506
507         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
508             x_inner = x_outer;
509             y_inner = y_outer;
510             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
511                 mb_intra_ub = 0;
512                 score_dep = 0;
513                 if (x_inner != 0) {
514                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
515                     score_dep |= MB_SCOREBOARD_A;
516                 }
517                 if (y_inner != mb_row) {
518                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
519                     score_dep |= MB_SCOREBOARD_B;
520                     if (x_inner != 0)
521                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
522                     if (x_inner != (mb_width - 1)) {
523                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
524                         score_dep |= MB_SCOREBOARD_C;
525                     }
526                 }
527
528                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
529                 *command_ptr++ = kernel;
530                 *command_ptr++ = USE_SCOREBOARD;
531                 /* Indirect data */
532                 *command_ptr++ = 0;
533                 /* the (X, Y) term of scoreboard */
534                 *command_ptr++ = ((y_inner << 16) | x_inner);
535                 *command_ptr++ = score_dep;
536                 /*inline data */
537                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
538                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
539                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
540                 *command_ptr++ = 0;
541
542                 x_inner -= 2;
543                 y_inner += 1;
544             }
545             x_outer += 1;
546         }
547
548         xtemp_outer = mb_width - 2;
549         if (xtemp_outer < 0)
550             xtemp_outer = 0;
551         x_outer = xtemp_outer;
552         y_outer = first_mb / mb_width;
553         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
554             y_inner = y_outer;
555             x_inner = x_outer;
556             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
557                 mb_intra_ub = 0;
558                 score_dep = 0;
559                 if (x_inner != 0) {
560                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
561                     score_dep |= MB_SCOREBOARD_A;
562                 }
563                 if (y_inner != mb_row) {
564                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
565                     score_dep |= MB_SCOREBOARD_B;
566                     if (x_inner != 0)
567                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
568
569                     if (x_inner != (mb_width - 1)) {
570                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
571                         score_dep |= MB_SCOREBOARD_C;
572                     }
573                 }
574
575                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
576                 *command_ptr++ = kernel;
577                 *command_ptr++ = USE_SCOREBOARD;
578                 /* Indirect data */
579                 *command_ptr++ = 0;
580                 /* the (X, Y) term of scoreboard */
581                 *command_ptr++ = ((y_inner << 16) | x_inner);
582                 *command_ptr++ = score_dep;
583                 /*inline data */
584                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
585                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
586
587                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
588                 *command_ptr++ = 0;
589                 x_inner -= 2;
590                 y_inner += 1;
591             }
592             x_outer++;
593             if (x_outer >= mb_width) {
594                 y_outer += 1;
595                 x_outer = xtemp_outer;
596             }
597         }
598     }
599
600     *command_ptr++ = MI_BATCH_BUFFER_END;
601     *command_ptr++ = 0;
602
603     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
604 }
605
606 static void
607 gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
608                               struct encode_state *encode_state,
609                               int mb_width, int mb_height,
610                               int kernel,
611                               int transform_8x8_mode_flag,
612                               struct intel_encoder_context *encoder_context)
613 {
614     struct gen6_vme_context *vme_context = encoder_context->vme_context;
615     int mb_x = 0, mb_y = 0;
616     int i, s;
617     unsigned int *command_ptr;
618     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
619     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
620     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
621     int qp;
622     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
623     int qp_mb, qp_index;
624
625     if (encoder_context->rate_control_mode == VA_RC_CQP)
626         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
627     else
628         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
629
630     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
631     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
632
633     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
634         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
635         int slice_mb_begin = pSliceParameter->macroblock_address;
636         int slice_mb_number = pSliceParameter->num_macroblocks;
637         unsigned int mb_intra_ub;
638         int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
639         for (i = 0; i < slice_mb_number;) {
640             int mb_count = i + slice_mb_begin;
641             mb_x = mb_count % mb_width;
642             mb_y = mb_count / mb_width;
643             mb_intra_ub = 0;
644             if (mb_x != 0) {
645                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
646             }
647             if (mb_y != 0) {
648                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
649                 if (mb_x != 0)
650                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
651                 if (mb_x != (mb_width - 1))
652                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
653             }
654             if (i < mb_width) {
655                 if (i == 0)
656                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
657                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
658                 if ((i == (mb_width - 1)) && slice_mb_x) {
659                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
660                 }
661             }
662
663             if ((i == mb_width) && slice_mb_x) {
664                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
665             }
666             *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
667             *command_ptr++ = kernel;
668             *command_ptr++ = 0;
669             *command_ptr++ = 0;
670             *command_ptr++ = 0;
671             *command_ptr++ = 0;
672
673             /*inline data */
674             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
675             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
676             /* qp occupies one byte */
677             if (vme_context->roi_enabled) {
678                 qp_index = mb_y * mb_width + mb_x;
679                 qp_mb = *(vme_context->qp_per_mb + qp_index);
680             } else
681                 qp_mb = qp;
682             *command_ptr++ = qp_mb;
683
684             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
685             *command_ptr++ = 0;
686             i += 1;
687         }
688     }
689
690     *command_ptr++ = MI_BATCH_BUFFER_END;
691     *command_ptr++ = 0;
692
693     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
694 }
695
696 static void gen9_vme_media_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
697 {
698     struct gen6_vme_context *vme_context = encoder_context->vme_context;
699
700     gen8_gpe_context_init(ctx, &vme_context->gpe_context);
701
702     /* VME output buffer */
703     dri_bo_unreference(vme_context->vme_output.bo);
704     vme_context->vme_output.bo = NULL;
705
706     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
707     vme_context->vme_batchbuffer.bo = NULL;
708
709     /* VME state */
710     dri_bo_unreference(vme_context->vme_state.bo);
711     vme_context->vme_state.bo = NULL;
712 }
713
714 static void gen9_vme_pipeline_programing(VADriverContextP ctx,
715                                          struct encode_state *encode_state,
716                                          struct intel_encoder_context *encoder_context)
717 {
718     struct gen6_vme_context *vme_context = encoder_context->vme_context;
719     struct intel_batchbuffer *batch = encoder_context->base.batch;
720     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
721     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
722     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
723     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
724     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
725     int kernel_shader;
726     bool allow_hwscore = true;
727     int s;
728     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
729
730     if (is_low_quality)
731         allow_hwscore = false;
732     else {
733         for (s = 0; s < encode_state->num_slice_params_ext; s++) {
734             pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
735             if ((pSliceParameter->macroblock_address % width_in_mbs)) {
736                 allow_hwscore = false;
737                 break;
738             }
739         }
740     }
741
742     if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
743         (pSliceParameter->slice_type == SLICE_TYPE_SI)) {
744         kernel_shader = VME_INTRA_SHADER;
745     } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
746                (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
747         kernel_shader = VME_INTER_SHADER;
748     } else {
749         kernel_shader = VME_BINTER_SHADER;
750         if (!allow_hwscore)
751             kernel_shader = VME_INTER_SHADER;
752     }
753     if (allow_hwscore)
754         gen9wa_vme_walker_fill_vme_batchbuffer(ctx,
755                                                encode_state,
756                                                width_in_mbs, height_in_mbs,
757                                                kernel_shader,
758                                                pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
759                                                encoder_context);
760     else
761         gen9_vme_fill_vme_batchbuffer(ctx,
762                                       encode_state,
763                                       width_in_mbs, height_in_mbs,
764                                       kernel_shader,
765                                       pPicParameter->pic_fields.bits.transform_8x8_mode_flag,
766                                       encoder_context);
767
768     intel_batchbuffer_start_atomic(batch, 0x1000);
769     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
770     BEGIN_BATCH(batch, 3);
771     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
772     OUT_RELOC64(batch,
773                 vme_context->vme_batchbuffer.bo,
774                 I915_GEM_DOMAIN_COMMAND, 0,
775                 0);
776     ADVANCE_BATCH(batch);
777
778     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
779
780     intel_batchbuffer_end_atomic(batch);
781 }
782
783 static VAStatus gen9_vme_prepare(VADriverContextP ctx,
784                                  struct encode_state *encode_state,
785                                  struct intel_encoder_context *encoder_context)
786 {
787     VAStatus vaStatus = VA_STATUS_SUCCESS;
788     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
789     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
790     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
791     struct gen6_vme_context *vme_context = encoder_context->vme_context;
792
793     if (!vme_context->h264_level ||
794         (vme_context->h264_level != pSequenceParameter->level_idc)) {
795         vme_context->h264_level = pSequenceParameter->level_idc;
796     }
797
798     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
799     intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
800     intel_h264_enc_roi_config(ctx, encode_state, encoder_context);
801
802     /*Setup all the memory object*/
803     gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
804     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
805     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
806     gen9_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1);
807
808     /*Programing media pipeline*/
809     gen9_vme_pipeline_programing(ctx, encode_state, encoder_context);
810
811     return vaStatus;
812 }
813
814 static VAStatus gen9_vme_run(VADriverContextP ctx,
815                              struct encode_state *encode_state,
816                              struct intel_encoder_context *encoder_context)
817 {
818     struct intel_batchbuffer *batch = encoder_context->base.batch;
819
820     intel_batchbuffer_flush(batch);
821
822     return VA_STATUS_SUCCESS;
823 }
824
825 static VAStatus gen9_vme_stop(VADriverContextP ctx,
826                               struct encode_state *encode_state,
827                               struct intel_encoder_context *encoder_context)
828 {
829     return VA_STATUS_SUCCESS;
830 }
831
832 static VAStatus
833 gen9_vme_pipeline(VADriverContextP ctx,
834                   VAProfile profile,
835                   struct encode_state *encode_state,
836                   struct intel_encoder_context *encoder_context)
837 {
838     gen9_vme_media_init(ctx, encoder_context);
839     gen9_vme_prepare(ctx, encode_state, encoder_context);
840     gen9_vme_run(ctx, encode_state, encoder_context);
841     gen9_vme_stop(ctx, encode_state, encoder_context);
842
843     return VA_STATUS_SUCCESS;
844 }
845
846 static void
847 gen9_vme_mpeg2_output_buffer_setup(VADriverContextP ctx,
848                                    struct encode_state *encode_state,
849                                    int index,
850                                    int is_intra,
851                                    struct intel_encoder_context *encoder_context)
852
853 {
854     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
855     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
856     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
857
858     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
859 }
860
861 static void
862 gen9_vme_mpeg2_output_vme_batchbuffer_setup(VADriverContextP ctx,
863                                             struct encode_state *encode_state,
864                                             int index,
865                                             struct intel_encoder_context *encoder_context)
866
867 {
868     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
869     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
870     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
871
872     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
873 }
874
875 static VAStatus
876 gen9_vme_mpeg2_surface_setup(VADriverContextP ctx,
877                              struct encode_state *encode_state,
878                              int is_intra,
879                              struct intel_encoder_context *encoder_context)
880 {
881     struct object_surface *obj_surface;
882
883     /*Setup surfaces state*/
884     /* current picture for encoding */
885     obj_surface = encode_state->input_yuv_object;
886     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
887     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
888     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
889
890     if (!is_intra) {
891         /* reference 0 */
892         obj_surface = encode_state->reference_objects[0];
893
894         if (obj_surface->bo != NULL)
895             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
896
897         /* reference 1 */
898         obj_surface = encode_state->reference_objects[1];
899
900         if (obj_surface && obj_surface->bo != NULL)
901             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
902     }
903
904     /* VME output */
905     gen9_vme_mpeg2_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
906     gen9_vme_mpeg2_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
907
908     return VA_STATUS_SUCCESS;
909 }
910
911 static void
912 gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
913                                              struct encode_state *encode_state,
914                                              int mb_width, int mb_height,
915                                              int kernel,
916                                              struct intel_encoder_context *encoder_context)
917 {
918     struct gen6_vme_context *vme_context = encoder_context->vme_context;
919     unsigned int *command_ptr;
920
921 #define     MPEG2_SCOREBOARD        (1 << 21)
922
923     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
924     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
925
926     {
927         unsigned int mb_intra_ub, score_dep;
928         int x_outer, y_outer, x_inner, y_inner;
929         int xtemp_outer = 0;
930         int first_mb = 0;
931         int num_mb = mb_width * mb_height;
932
933         x_outer = 0;
934         y_outer = 0;
935
936         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
937             x_inner = x_outer;
938             y_inner = y_outer;
939             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
940                 mb_intra_ub = 0;
941                 score_dep = 0;
942                 if (x_inner != 0) {
943                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
944                     score_dep |= MB_SCOREBOARD_A;
945                 }
946                 if (y_inner != 0) {
947                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
948                     score_dep |= MB_SCOREBOARD_B;
949
950                     if (x_inner != 0)
951                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
952
953                     if (x_inner != (mb_width - 1)) {
954                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
955                         score_dep |= MB_SCOREBOARD_C;
956                     }
957                 }
958
959                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
960                 *command_ptr++ = kernel;
961                 *command_ptr++ = MPEG2_SCOREBOARD;
962                 /* Indirect data */
963                 *command_ptr++ = 0;
964                 /* the (X, Y) term of scoreboard */
965                 *command_ptr++ = ((y_inner << 16) | x_inner);
966                 *command_ptr++ = score_dep;
967                 /*inline data */
968                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
969                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
970                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
971                 *command_ptr++ = 0;
972
973                 x_inner -= 2;
974                 y_inner += 1;
975             }
976             x_outer += 1;
977         }
978
979         xtemp_outer = mb_width - 2;
980         if (xtemp_outer < 0)
981             xtemp_outer = 0;
982         x_outer = xtemp_outer;
983         y_outer = 0;
984         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
985             y_inner = y_outer;
986             x_inner = x_outer;
987             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
988                 mb_intra_ub = 0;
989                 score_dep = 0;
990                 if (x_inner != 0) {
991                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
992                     score_dep |= MB_SCOREBOARD_A;
993                 }
994                 if (y_inner != 0) {
995                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
996                     score_dep |= MB_SCOREBOARD_B;
997
998                     if (x_inner != 0)
999                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1000
1001                     if (x_inner != (mb_width - 1)) {
1002                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1003                         score_dep |= MB_SCOREBOARD_C;
1004                     }
1005                 }
1006
1007                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1008                 *command_ptr++ = kernel;
1009                 *command_ptr++ = MPEG2_SCOREBOARD;
1010                 /* Indirect data */
1011                 *command_ptr++ = 0;
1012                 /* the (X, Y) term of scoreboard */
1013                 *command_ptr++ = ((y_inner << 16) | x_inner);
1014                 *command_ptr++ = score_dep;
1015                 /*inline data */
1016                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1017                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1018
1019                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1020                 *command_ptr++ = 0;
1021                 x_inner -= 2;
1022                 y_inner += 1;
1023             }
1024             x_outer++;
1025             if (x_outer >= mb_width) {
1026                 y_outer += 1;
1027                 x_outer = xtemp_outer;
1028             }
1029         }
1030     }
1031
1032     *command_ptr++ = MI_BATCH_BUFFER_END;
1033     *command_ptr++ = 0;
1034
1035     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1036     return;
1037 }
1038
1039 static void
1040 gen9_vme_mpeg2_fill_vme_batchbuffer(VADriverContextP ctx,
1041                                     struct encode_state *encode_state,
1042                                     int mb_width, int mb_height,
1043                                     int kernel,
1044                                     int transform_8x8_mode_flag,
1045                                     struct intel_encoder_context *encoder_context)
1046 {
1047     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1048     int mb_x = 0, mb_y = 0;
1049     int i, s, j;
1050     unsigned int *command_ptr;
1051
1052
1053     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1054     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1055
1056     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1057         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1058
1059         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1060             int slice_mb_begin = slice_param->macroblock_address;
1061             int slice_mb_number = slice_param->num_macroblocks;
1062             unsigned int mb_intra_ub;
1063
1064             for (i = 0; i < slice_mb_number;) {
1065                 int mb_count = i + slice_mb_begin;
1066
1067                 mb_x = mb_count % mb_width;
1068                 mb_y = mb_count / mb_width;
1069                 mb_intra_ub = 0;
1070
1071                 if (mb_x != 0) {
1072                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1073                 }
1074
1075                 if (mb_y != 0) {
1076                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1077
1078                     if (mb_x != 0)
1079                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1080
1081                     if (mb_x != (mb_width - 1))
1082                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1083                 }
1084
1085                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1086                 *command_ptr++ = kernel;
1087                 *command_ptr++ = 0;
1088                 *command_ptr++ = 0;
1089                 *command_ptr++ = 0;
1090                 *command_ptr++ = 0;
1091
1092                 /*inline data */
1093                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1094                 *command_ptr++ = ((1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1095
1096                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1097                 *command_ptr++ = 0;
1098                 i += 1;
1099             }
1100
1101             slice_param++;
1102         }
1103     }
1104
1105     *command_ptr++ = MI_BATCH_BUFFER_END;
1106     *command_ptr++ = 0;
1107
1108     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1109 }
1110
1111 static void
1112 gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
1113                                    struct encode_state *encode_state,
1114                                    int is_intra,
1115                                    struct intel_encoder_context *encoder_context)
1116 {
1117     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1118     struct intel_batchbuffer *batch = encoder_context->base.batch;
1119     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1120     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1121     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1122     bool allow_hwscore = true;
1123     int s;
1124     int kernel_shader;
1125     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1126
1127     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1128         int j;
1129         VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
1130
1131         for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
1132             if (slice_param->macroblock_address % width_in_mbs) {
1133                 allow_hwscore = false;
1134                 break;
1135             }
1136         }
1137     }
1138
1139     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1140     if (pic_param->picture_type == VAEncPictureTypeIntra) {
1141         allow_hwscore = false;
1142         kernel_shader = VME_INTRA_SHADER;
1143     } else {
1144         kernel_shader = VME_INTER_SHADER;
1145     }
1146
1147     if (allow_hwscore)
1148         gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1149                                                      encode_state,
1150                                                      width_in_mbs, height_in_mbs,
1151                                                      kernel_shader,
1152                                                      encoder_context);
1153     else
1154         gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
1155                                             encode_state,
1156                                             width_in_mbs, height_in_mbs,
1157                                             is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
1158                                             0,
1159                                             encoder_context);
1160
1161     intel_batchbuffer_start_atomic(batch, 0x1000);
1162     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1163     BEGIN_BATCH(batch, 4);
1164     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1165     OUT_RELOC64(batch,
1166                 vme_context->vme_batchbuffer.bo,
1167                 I915_GEM_DOMAIN_COMMAND, 0,
1168                 0);
1169     OUT_BATCH(batch, 0);
1170     ADVANCE_BATCH(batch);
1171
1172     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1173
1174     intel_batchbuffer_end_atomic(batch);
1175 }
1176
1177 static VAStatus
1178 gen9_vme_mpeg2_prepare(VADriverContextP ctx,
1179                        struct encode_state *encode_state,
1180                        struct intel_encoder_context *encoder_context)
1181 {
1182     VAStatus vaStatus = VA_STATUS_SUCCESS;
1183     VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1184     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1185     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1186
1187     if ((!vme_context->mpeg2_level) ||
1188         (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
1189         vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
1190     }
1191
1192     /*Setup all the memory object*/
1193     gen9_vme_mpeg2_surface_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1194     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1195     //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1196     intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context);
1197     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1198
1199     /*Programing media pipeline*/
1200     gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context);
1201
1202     return vaStatus;
1203 }
1204
1205 static VAStatus
1206 gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
1207                         VAProfile profile,
1208                         struct encode_state *encode_state,
1209                         struct intel_encoder_context *encoder_context)
1210 {
1211     gen9_vme_media_init(ctx, encoder_context);
1212     gen9_vme_mpeg2_prepare(ctx, encode_state, encoder_context);
1213     gen9_vme_run(ctx, encode_state, encoder_context);
1214     gen9_vme_stop(ctx, encode_state, encoder_context);
1215
1216     return VA_STATUS_SUCCESS;
1217 }
1218
1219 static void
1220 gen9_vme_vp8_output_buffer_setup(VADriverContextP ctx,
1221                                  struct encode_state *encode_state,
1222                                  int index,
1223                                  int is_intra,
1224                                  struct intel_encoder_context *encoder_context)
1225 {
1226     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1227     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1228     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1229
1230     gen9_vme_output_buffer_setup(ctx, encode_state, index, encoder_context, is_intra, width_in_mbs, height_in_mbs);
1231 }
1232
1233 static void
1234 gen9_vme_vp8_output_vme_batchbuffer_setup(VADriverContextP ctx,
1235                                           struct encode_state *encode_state,
1236                                           int index,
1237                                           struct intel_encoder_context *encoder_context)
1238 {
1239     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1240     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1241     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1242
1243     gen9_vme_output_vme_batchbuffer_setup(ctx, encode_state, index, encoder_context, width_in_mbs, height_in_mbs);
1244 }
1245
1246 static VAStatus
1247 gen9_vme_vp8_surface_setup(VADriverContextP ctx,
1248                            struct encode_state *encode_state,
1249                            int is_intra,
1250                            struct intel_encoder_context *encoder_context)
1251 {
1252     struct object_surface *obj_surface;
1253
1254     /*Setup surfaces state*/
1255     /* current picture for encoding */
1256     obj_surface = encode_state->input_yuv_object;
1257     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1258     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1259     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1260
1261     if (!is_intra) {
1262         /* reference 0 */
1263         obj_surface = encode_state->reference_objects[0];
1264
1265         if (obj_surface->bo != NULL)
1266             gen9_vme_source_surface_state(ctx, 1, obj_surface, encoder_context);
1267
1268         /* reference 1 */
1269         obj_surface = encode_state->reference_objects[1];
1270
1271         if (obj_surface && obj_surface->bo != NULL)
1272             gen9_vme_source_surface_state(ctx, 2, obj_surface, encoder_context);
1273     }
1274
1275     /* VME output */
1276     gen9_vme_vp8_output_buffer_setup(ctx, encode_state, 3, is_intra, encoder_context);
1277     gen9_vme_vp8_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1278
1279     return VA_STATUS_SUCCESS;
1280 }
1281
1282 static void
1283 gen9_vme_vp8_pipeline_programing(VADriverContextP ctx,
1284                                  struct encode_state *encode_state,
1285                                  int is_intra,
1286                                  struct intel_encoder_context *encoder_context)
1287 {
1288     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1289     struct intel_batchbuffer *batch = encoder_context->base.batch;
1290     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
1291     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
1292     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
1293     int kernel_shader = (is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER);
1294
1295     gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(ctx,
1296                                                  encode_state,
1297                                                  width_in_mbs, height_in_mbs,
1298                                                  kernel_shader,
1299                                                  encoder_context);
1300
1301     intel_batchbuffer_start_atomic(batch, 0x1000);
1302     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1303     BEGIN_BATCH(batch, 4);
1304     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1305     OUT_RELOC64(batch,
1306                 vme_context->vme_batchbuffer.bo,
1307                 I915_GEM_DOMAIN_COMMAND, 0,
1308                 0);
1309     OUT_BATCH(batch, 0);
1310     ADVANCE_BATCH(batch);
1311
1312     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1313
1314     intel_batchbuffer_end_atomic(batch);
1315 }
1316
1317 static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
1318                                      struct encode_state *encode_state,
1319                                      struct intel_encoder_context *encoder_context)
1320 {
1321     VAStatus vaStatus = VA_STATUS_SUCCESS;
1322     VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1323     int is_intra = !pPicParameter->pic_flags.bits.frame_type;
1324
1325     /* update vp8 mbmv cost */
1326     intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
1327
1328     /*Setup all the memory object*/
1329     gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
1330     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1331     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1332
1333     /*Programing media pipeline*/
1334     gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context);
1335
1336     return vaStatus;
1337 }
1338
1339 static VAStatus
1340 gen9_vme_vp8_pipeline(VADriverContextP ctx,
1341                       VAProfile profile,
1342                       struct encode_state *encode_state,
1343                       struct intel_encoder_context *encoder_context)
1344 {
1345     gen9_vme_media_init(ctx, encoder_context);
1346     gen9_vme_vp8_prepare(ctx, encode_state, encoder_context);
1347     gen9_vme_run(ctx, encode_state, encoder_context);
1348     gen9_vme_stop(ctx, encode_state, encoder_context);
1349
1350     return VA_STATUS_SUCCESS;
1351 }
1352
1353 /* HEVC */
1354
1355 static void
1356 gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
1357                                   struct encode_state *encode_state,
1358                                   int index,
1359                                   struct intel_encoder_context *encoder_context)
1360
1361 {
1362     struct i965_driver_data *i965 = i965_driver_data(ctx);
1363     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1364     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1365     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1366     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1367     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1368     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15) / 16;
1369
1370
1371     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
1372     vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
1373
1374     if (is_intra)
1375         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
1376     else
1377         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
1378     /*
1379      * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
1380      * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
1381      * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
1382      */
1383
1384     vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
1385                                               "VME output buffer",
1386                                               vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
1387                                               0x1000);
1388     assert(vme_context->vme_output.bo);
1389     vme_context->vme_buffer_suface_setup(ctx,
1390                                          &vme_context->gpe_context,
1391                                          &vme_context->vme_output,
1392                                          BINDING_TABLE_OFFSET(index),
1393                                          SURFACE_STATE_OFFSET(index));
1394 }
1395
1396 static void
1397 gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
1398                                            struct encode_state *encode_state,
1399                                            int index,
1400                                            struct intel_encoder_context *encoder_context)
1401
1402 {
1403     struct i965_driver_data *i965 = i965_driver_data(ctx);
1404     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1405     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1406     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1407     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15) / 16;
1408
1409     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
1410     vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
1411     vme_context->vme_batchbuffer.pitch = 16;
1412     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
1413                                                    "VME batchbuffer",
1414                                                    vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
1415                                                    0x1000);
1416 }
1417 static VAStatus
1418 gen9_vme_hevc_surface_setup(VADriverContextP ctx,
1419                             struct encode_state *encode_state,
1420                             int is_intra,
1421                             struct intel_encoder_context *encoder_context)
1422 {
1423     struct object_surface *obj_surface;
1424     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1425     GenHevcSurface *hevc_encoder_surface = NULL;
1426
1427     /*Setup surfaces state*/
1428     /* current picture for encoding */
1429     obj_surface = encode_state->input_yuv_object;
1430
1431     if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
1432         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) {
1433         hevc_encoder_surface = (GenHevcSurface *)encode_state->reconstructed_object->private_data;
1434         assert(hevc_encoder_surface);
1435         obj_surface = hevc_encoder_surface->nv12_surface_obj;
1436     }
1437     gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
1438     gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
1439     gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
1440
1441     if (!is_intra) {
1442         VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1443         int slice_type;
1444
1445         slice_type = slice_param->slice_type;
1446         assert(slice_type != HEVC_SLICE_I);
1447
1448         /* to do HEVC */
1449         intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
1450
1451         if (slice_type == HEVC_SLICE_B)
1452             intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
1453     }
1454
1455     /* VME output */
1456     gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
1457     gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
1458
1459     return VA_STATUS_SUCCESS;
1460 }
1461 static void
1462 gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1463                                             struct encode_state *encode_state,
1464                                             int mb_width, int mb_height,
1465                                             int kernel,
1466                                             int transform_8x8_mode_flag,
1467                                             struct intel_encoder_context *encoder_context)
1468 {
1469     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1470     int mb_row;
1471     int s;
1472     unsigned int *command_ptr;
1473     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1474     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1475     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1476     int ctb_size = 1 << log2_ctb_size;
1477     int num_mb_in_ctb = (ctb_size + 15) / 16;
1478     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1479
1480 #define     USE_SCOREBOARD      (1 << 21)
1481
1482     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1483     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1484
1485     /*slice_segment_address  must picture_width_in_ctb alainment */
1486     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1487         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1488         int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1489         int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1490         unsigned int mb_intra_ub, score_dep;
1491         int x_outer, y_outer, x_inner, y_inner;
1492         int xtemp_outer = 0;
1493
1494         x_outer = first_mb % mb_width;
1495         y_outer = first_mb / mb_width;
1496         mb_row = y_outer;
1497
1498         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1499             x_inner = x_outer;
1500             y_inner = y_outer;
1501             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1502                 mb_intra_ub = 0;
1503                 score_dep = 0;
1504                 if (x_inner != 0) {
1505                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1506                     score_dep |= MB_SCOREBOARD_A;
1507                 }
1508                 if (y_inner != mb_row) {
1509                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1510                     score_dep |= MB_SCOREBOARD_B;
1511                     if (x_inner != 0)
1512                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1513                     if (x_inner != (mb_width - 1)) {
1514                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1515                         score_dep |= MB_SCOREBOARD_C;
1516                     }
1517                 }
1518
1519                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1520                 *command_ptr++ = kernel;
1521                 *command_ptr++ = USE_SCOREBOARD;
1522                 /* Indirect data */
1523                 *command_ptr++ = 0;
1524                 /* the (X, Y) term of scoreboard */
1525                 *command_ptr++ = ((y_inner << 16) | x_inner);
1526                 *command_ptr++ = score_dep;
1527                 /*inline data */
1528                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1529                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1530                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1531                 *command_ptr++ = 0;
1532
1533                 x_inner -= 2;
1534                 y_inner += 1;
1535             }
1536             x_outer += 1;
1537         }
1538
1539         xtemp_outer = mb_width - 2;
1540         if (xtemp_outer < 0)
1541             xtemp_outer = 0;
1542         x_outer = xtemp_outer;
1543         y_outer = first_mb / mb_width;
1544         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1545             y_inner = y_outer;
1546             x_inner = x_outer;
1547             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1548                 mb_intra_ub = 0;
1549                 score_dep = 0;
1550                 if (x_inner != 0) {
1551                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1552                     score_dep |= MB_SCOREBOARD_A;
1553                 }
1554                 if (y_inner != mb_row) {
1555                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1556                     score_dep |= MB_SCOREBOARD_B;
1557                     if (x_inner != 0)
1558                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1559
1560                     if (x_inner != (mb_width - 1)) {
1561                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1562                         score_dep |= MB_SCOREBOARD_C;
1563                     }
1564                 }
1565
1566                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1567                 *command_ptr++ = kernel;
1568                 *command_ptr++ = USE_SCOREBOARD;
1569                 /* Indirect data */
1570                 *command_ptr++ = 0;
1571                 /* the (X, Y) term of scoreboard */
1572                 *command_ptr++ = ((y_inner << 16) | x_inner);
1573                 *command_ptr++ = score_dep;
1574                 /*inline data */
1575                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1576                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1577
1578                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1579                 *command_ptr++ = 0;
1580                 x_inner -= 2;
1581                 y_inner += 1;
1582             }
1583             x_outer++;
1584             if (x_outer >= mb_width) {
1585                 y_outer += 1;
1586                 x_outer = xtemp_outer;
1587             }
1588         }
1589     }
1590
1591     *command_ptr++ = MI_BATCH_BUFFER_END;
1592     *command_ptr++ = 0;
1593
1594     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1595 }
1596
1597 static void
1598 gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
1599                                    struct encode_state *encode_state,
1600                                    int mb_width, int mb_height,
1601                                    int kernel,
1602                                    int transform_8x8_mode_flag,
1603                                    struct intel_encoder_context *encoder_context)
1604 {
1605     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1606     int mb_x = 0, mb_y = 0;
1607     int i, s;
1608     unsigned int *command_ptr;
1609     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1610     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1611     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1612
1613     int ctb_size = 1 << log2_ctb_size;
1614     int num_mb_in_ctb = (ctb_size + 15) / 16;
1615     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1616
1617     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1618     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1619
1620     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1621         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1622         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1623         int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
1624
1625         unsigned int mb_intra_ub;
1626         int slice_mb_x = slice_mb_begin % mb_width;
1627         for (i = 0; i < slice_mb_number;) {
1628             int mb_count = i + slice_mb_begin;
1629             mb_x = mb_count % mb_width;
1630             mb_y = mb_count / mb_width;
1631             mb_intra_ub = 0;
1632
1633             if (mb_x != 0) {
1634                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1635             }
1636             if (mb_y != 0) {
1637                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1638                 if (mb_x != 0)
1639                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1640                 if (mb_x != (mb_width - 1))
1641                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1642             }
1643             if (i < mb_width) {
1644                 if (i == 0)
1645                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
1646                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
1647                 if ((i == (mb_width - 1)) && slice_mb_x) {
1648                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1649                 }
1650             }
1651
1652             if ((i == mb_width) && slice_mb_x) {
1653                 mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
1654             }
1655
1656             *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1657             *command_ptr++ = kernel;
1658             *command_ptr++ = 0;
1659             *command_ptr++ = 0;
1660             *command_ptr++ = 0;
1661             *command_ptr++ = 0;
1662
1663             /*inline data */
1664             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
1665             *command_ptr++ = ((1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1666
1667             *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
1668             *command_ptr++ = 0;
1669             i += 1;
1670         }
1671     }
1672
1673     *command_ptr++ = MI_BATCH_BUFFER_END;
1674     *command_ptr++ = 0;
1675
1676     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1677 }
1678
1679 static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
1680                                               struct encode_state *encode_state,
1681                                               struct intel_encoder_context *encoder_context)
1682 {
1683     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1684     struct intel_batchbuffer *batch = encoder_context->base.batch;
1685     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1686     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1687     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1688     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15) / 16;
1689     int kernel_shader;
1690     bool allow_hwscore = true;
1691     int s;
1692
1693     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1694     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1695
1696     int ctb_size = 1 << log2_ctb_size;
1697     int num_mb_in_ctb = (ctb_size + 15) / 16;
1698     int transform_8x8_mode_flag = 1;
1699     num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
1700
1701     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1702         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
1703         int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
1704         if ((slice_mb_begin % width_in_mbs)) {
1705             allow_hwscore = false;
1706             break;
1707         }
1708     }
1709
1710     if (pSliceParameter->slice_type == HEVC_SLICE_I) {
1711         kernel_shader = VME_INTRA_SHADER;
1712     } else if (pSliceParameter->slice_type == HEVC_SLICE_P) {
1713         kernel_shader = VME_INTER_SHADER;
1714     } else {
1715         kernel_shader = VME_BINTER_SHADER;
1716         if (!allow_hwscore)
1717             kernel_shader = VME_INTER_SHADER;
1718     }
1719     if (allow_hwscore)
1720         gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
1721                                                     encode_state,
1722                                                     width_in_mbs, height_in_mbs,
1723                                                     kernel_shader,
1724                                                     transform_8x8_mode_flag,
1725                                                     encoder_context);
1726     else
1727         gen9_vme_hevc_fill_vme_batchbuffer(ctx,
1728                                            encode_state,
1729                                            width_in_mbs, height_in_mbs,
1730                                            kernel_shader,
1731                                            transform_8x8_mode_flag,
1732                                            encoder_context);
1733
1734     intel_batchbuffer_start_atomic(batch, 0x1000);
1735     gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
1736     BEGIN_BATCH(batch, 3);
1737     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1738     OUT_RELOC64(batch,
1739                 vme_context->vme_batchbuffer.bo,
1740                 I915_GEM_DOMAIN_COMMAND, 0,
1741                 0);
1742     ADVANCE_BATCH(batch);
1743
1744     gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
1745
1746     intel_batchbuffer_end_atomic(batch);
1747 }
1748
1749 static VAStatus gen9_intel_init_hevc_surface(VADriverContextP ctx,
1750                                              struct intel_encoder_context *encoder_context,
1751                                              struct encode_state *encode_state,
1752                                              struct object_surface *input_obj_surface,
1753                                              struct object_surface *output_obj_surface,
1754                                              int set_flag)
1755 {
1756     struct i965_driver_data *i965 = i965_driver_data(ctx);
1757     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1758     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1759     GenHevcSurface *hevc_encoder_surface;
1760     struct i965_surface src_surface, dst_surface;
1761     struct object_surface *obj_surface;
1762     VARectangle rect;
1763     VAStatus status;
1764
1765     uint32_t size;
1766
1767     obj_surface = output_obj_surface;
1768     assert(obj_surface && obj_surface->bo);
1769
1770     if (obj_surface->private_data == NULL) {
1771
1772         if (mfc_context->pic_size.ctb_size == 16)
1773             size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
1774                    ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
1775         else
1776             size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
1777                    ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
1778         size <<= 6; /* in unit of 64bytes */
1779
1780         hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
1781
1782         assert(hevc_encoder_surface);
1783         hevc_encoder_surface->motion_vector_temporal_bo =
1784             dri_bo_alloc(i965->intel.bufmgr,
1785                          "motion vector temporal buffer",
1786                          size,
1787                          0x1000);
1788         assert(hevc_encoder_surface->motion_vector_temporal_bo);
1789
1790         hevc_encoder_surface->ctx = ctx;
1791         hevc_encoder_surface->nv12_surface_obj = NULL;
1792         hevc_encoder_surface->nv12_surface_id = VA_INVALID_SURFACE;
1793         hevc_encoder_surface->has_p010_to_nv12_done = 0;
1794
1795         obj_surface->private_data = (void *)hevc_encoder_surface;
1796         obj_surface->free_private_data = (void *)gen_free_hevc_surface;
1797     }
1798
1799     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1800
1801     if (!hevc_encoder_surface->has_p010_to_nv12_done && obj_surface->fourcc == VA_FOURCC_P010) {
1802         // convert input
1803         rect.x = 0;
1804         rect.y = 0;
1805         rect.width = obj_surface->orig_width;
1806         rect.height = obj_surface->orig_height;
1807
1808         src_surface.base = (struct object_base *)input_obj_surface;
1809         src_surface.type = I965_SURFACE_TYPE_SURFACE;
1810         src_surface.flags = I965_SURFACE_FLAG_FRAME;
1811
1812         if (SURFACE(hevc_encoder_surface->nv12_surface_id) == NULL) {
1813             status = i965_CreateSurfaces(ctx,
1814                                          obj_surface->orig_width,
1815                                          obj_surface->orig_height,
1816                                          VA_RT_FORMAT_YUV420,
1817                                          1,
1818                                          &hevc_encoder_surface->nv12_surface_id);
1819             assert(status == VA_STATUS_SUCCESS);
1820
1821             if (status != VA_STATUS_SUCCESS)
1822                 return status;
1823         }
1824
1825         obj_surface = SURFACE(hevc_encoder_surface->nv12_surface_id);
1826         hevc_encoder_surface->nv12_surface_obj = obj_surface;
1827         assert(obj_surface);
1828         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1829
1830         dst_surface.base = (struct object_base *)obj_surface;
1831         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
1832         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
1833
1834         status = i965_image_processing(ctx,
1835                                        &src_surface,
1836                                        &rect,
1837                                        &dst_surface,
1838                                        &rect);
1839         assert(status == VA_STATUS_SUCCESS);
1840
1841         if (set_flag)
1842             hevc_encoder_surface->has_p010_to_nv12_done = 1;
1843     }
1844
1845     return VA_STATUS_SUCCESS;
1846 }
1847
1848 static VAStatus gen9_intel_hevc_input_check(VADriverContextP ctx,
1849                                             struct encode_state *encode_state,
1850                                             struct intel_encoder_context *encoder_context)
1851 {
1852     struct i965_driver_data *i965 = i965_driver_data(ctx);
1853     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1854     struct object_surface *obj_surface;
1855     GenHevcSurface *hevc_encoder_surface = NULL;
1856     int i;
1857     int fourcc;
1858
1859     obj_surface = SURFACE(encoder_context->input_yuv_surface);
1860     assert(obj_surface && obj_surface->bo);
1861
1862     fourcc = obj_surface->fourcc;
1863     /* Setup current frame and current direct mv buffer*/
1864     obj_surface = encode_state->reconstructed_object;
1865     if (fourcc == VA_FOURCC_P010)
1866         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_P010, SUBSAMPLE_YUV420);
1867     else
1868         i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1869     hevc_encoder_surface = NULL;
1870     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
1871     if (hevc_encoder_surface)
1872         hevc_encoder_surface->has_p010_to_nv12_done = 0;
1873     gen9_intel_init_hevc_surface(ctx, encoder_context, encode_state, encode_state->input_yuv_object,
1874                                  obj_surface, 0);
1875
1876     /* Setup reference frames and direct mv buffers*/
1877     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
1878         obj_surface = encode_state->reference_objects[i];
1879
1880         if (obj_surface && obj_surface->bo) {
1881             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1882             dri_bo_reference(obj_surface->bo);
1883
1884             gen9_intel_init_hevc_surface(ctx, encoder_context, encode_state, obj_surface,
1885                                          obj_surface, 1);
1886         } else {
1887             break;
1888         }
1889     }
1890
1891     return VA_STATUS_SUCCESS;
1892 }
1893
1894 static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
1895                                       struct encode_state *encode_state,
1896                                       struct intel_encoder_context *encoder_context)
1897 {
1898     VAStatus vaStatus = VA_STATUS_SUCCESS;
1899     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1900     int is_intra = pSliceParameter->slice_type == HEVC_SLICE_I;
1901     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1902     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1903
1904     /* here use the avc level for hevc vme */
1905     if (!vme_context->hevc_level ||
1906         (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
1907         vme_context->hevc_level = pSequenceParameter->general_level_idc;
1908     }
1909
1910     //internal input check for main10
1911     gen9_intel_hevc_input_check(ctx, encode_state, encoder_context);
1912
1913     intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
1914
1915     /*Setup all the memory object*/
1916     gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
1917     gen9_vme_interface_setup(ctx, encode_state, encoder_context);
1918     //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
1919     gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1);
1920
1921     /*Programing media pipeline*/
1922     gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
1923
1924     return vaStatus;
1925 }
1926
1927
1928 static VAStatus
1929 gen9_vme_hevc_pipeline(VADriverContextP ctx,
1930                        VAProfile profile,
1931                        struct encode_state *encode_state,
1932                        struct intel_encoder_context *encoder_context)
1933 {
1934     gen9_vme_media_init(ctx, encoder_context);
1935     gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
1936     gen9_vme_run(ctx, encode_state, encoder_context);
1937     gen9_vme_stop(ctx, encode_state, encoder_context);
1938
1939     return VA_STATUS_SUCCESS;
1940 }
1941
1942
1943 static void
1944 gen9_vme_context_destroy(void *context)
1945 {
1946     struct gen6_vme_context *vme_context = context;
1947
1948     gen8_gpe_context_destroy(&vme_context->gpe_context);
1949
1950     dri_bo_unreference(vme_context->vme_output.bo);
1951     vme_context->vme_output.bo = NULL;
1952
1953     dri_bo_unreference(vme_context->vme_state.bo);
1954     vme_context->vme_state.bo = NULL;
1955
1956     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
1957     vme_context->vme_batchbuffer.bo = NULL;
1958
1959     free(vme_context->vme_state_message);
1960     vme_context->vme_state_message = NULL;
1961
1962     dri_bo_unreference(vme_context->i_qp_cost_table);
1963     vme_context->i_qp_cost_table = NULL;
1964
1965     dri_bo_unreference(vme_context->p_qp_cost_table);
1966     vme_context->p_qp_cost_table = NULL;
1967
1968     dri_bo_unreference(vme_context->b_qp_cost_table);
1969     vme_context->b_qp_cost_table = NULL;
1970
1971     free(vme_context->qp_per_mb);
1972     vme_context->qp_per_mb = NULL;
1973
1974     free(vme_context);
1975 }
1976
1977 extern Bool i965_encoder_vp8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
1978
1979 Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1980 {
1981     struct i965_driver_data *i965 = i965_driver_data(ctx);
1982     struct gen6_vme_context *vme_context;
1983     struct i965_kernel *vme_kernel_list = NULL;
1984     int i965_kernel_num;
1985
1986     if (encoder_context->low_power_mode || encoder_context->codec == CODEC_JPEG) {
1987         encoder_context->vme_context = NULL;
1988         encoder_context->vme_pipeline = NULL;
1989         encoder_context->vme_context_destroy = NULL;
1990
1991         return True;
1992     }
1993
1994     if (encoder_context->codec == CODEC_VP9) {
1995         return gen9_vp9_vme_context_init(ctx, encoder_context);
1996     } else if (encoder_context->codec == CODEC_VP8) {
1997         return i965_encoder_vp8_vme_context_init(ctx, encoder_context);
1998     } else if (encoder_context->codec == CODEC_H264 ||
1999                encoder_context->codec == CODEC_H264_MVC) {
2000         return gen9_avc_vme_context_init(ctx, encoder_context);
2001     } else if (encoder_context->codec == CODEC_HEVC) {
2002         if (IS_GEN10(i965->intel.device_info))
2003             return gen10_hevc_vme_context_init(ctx, encoder_context);
2004         else
2005             return gen9_hevc_vme_context_init(ctx, encoder_context);
2006     }
2007
2008     vme_context = calloc(1, sizeof(struct gen6_vme_context));
2009
2010     switch (encoder_context->codec) {
2011     case CODEC_H264:
2012     case CODEC_H264_MVC:
2013         vme_kernel_list = gen9_vme_kernels;
2014         encoder_context->vme_pipeline = gen9_vme_pipeline;
2015         i965_kernel_num = sizeof(gen9_vme_kernels) / sizeof(struct i965_kernel);
2016         break;
2017
2018     case CODEC_MPEG2:
2019         vme_kernel_list = gen9_vme_mpeg2_kernels;
2020         encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
2021         i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
2022         break;
2023
2024     case CODEC_VP8:
2025         vme_kernel_list = gen9_vme_vp8_kernels;
2026         encoder_context->vme_pipeline = gen9_vme_vp8_pipeline;
2027         i965_kernel_num = sizeof(gen9_vme_vp8_kernels) / sizeof(struct i965_kernel);
2028         break;
2029
2030     case CODEC_HEVC:
2031         vme_kernel_list = gen9_vme_hevc_kernels;
2032         encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
2033         i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
2034         break;
2035
2036     default:
2037         /* never get here */
2038         assert(0);
2039         i965_kernel_num = 0;
2040
2041         break;
2042     }
2043
2044     assert(vme_context);
2045     vme_context->vme_kernel_sum = i965_kernel_num;
2046     vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2047
2048     vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
2049     vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
2050     vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
2051     vme_context->gpe_context.sampler.entry_size = 0;
2052     vme_context->gpe_context.sampler.max_entries = 0;
2053
2054     if (i965->intel.eu_total > 0) {
2055         vme_context->gpe_context.vfe_state.max_num_threads = 6 *
2056                                                              i965->intel.eu_total;
2057     } else
2058         vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2059
2060     vme_context->gpe_context.vfe_state.num_urb_entries = 64;
2061     vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
2062     vme_context->gpe_context.vfe_state.urb_entry_size = 16;
2063     vme_context->gpe_context.vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
2064
2065     gen7_vme_scoreboard_init(ctx, vme_context);
2066
2067     gen8_gpe_load_kernels(ctx,
2068                           &vme_context->gpe_context,
2069                           vme_kernel_list,
2070                           i965_kernel_num);
2071     vme_context->vme_surface2_setup = gen8_gpe_surface2_setup;
2072     vme_context->vme_media_rw_surface_setup = gen8_gpe_media_rw_surface_setup;
2073     vme_context->vme_buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2074     vme_context->vme_media_chroma_surface_setup = gen8_gpe_media_chroma_surface_setup;
2075
2076     encoder_context->vme_context = vme_context;
2077     encoder_context->vme_context_destroy = gen9_vme_context_destroy;
2078
2079     vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
2080
2081     return True;
2082 }