OSDN Git Service

548cbf4f4b145f022ae44e084a1ad4dfb9aa9bf1
[android-x86/hardware-intel-common-vaapi.git] / src / i965_gpe_utils.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Xiang Haihao <haihao.xiang@intel.com>
25  */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_drv_video.h"
36 #include "i965_gpe_utils.h"
37
38 static void
39 i965_gpe_select(VADriverContextP ctx,
40                 struct i965_gpe_context *gpe_context,
41                 struct intel_batchbuffer *batch)
42 {
43     BEGIN_BATCH(batch, 1);
44     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
45     ADVANCE_BATCH(batch);
46 }
47
48 static void
49 gen6_gpe_state_base_address(VADriverContextP ctx,
50                             struct i965_gpe_context *gpe_context,
51                             struct intel_batchbuffer *batch)
52 {
53     BEGIN_BATCH(batch, 10);
54
55     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
56     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Base Address */
57     OUT_RELOC(batch,
58               gpe_context->surface_state_binding_table.bo,
59               I915_GEM_DOMAIN_INSTRUCTION,
60               0,
61               BASE_ADDRESS_MODIFY);                     /* Surface state base address */
62     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Base Address */
63     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Base Address */
64     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Base Address */
65     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Access Upper Bound */
66     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Access Upper Bound */
67     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Access Upper Bound */
68     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Access Upper Bound */
69
70     ADVANCE_BATCH(batch);
71 }
72
73 static void
74 gen6_gpe_vfe_state(VADriverContextP ctx,
75                    struct i965_gpe_context *gpe_context,
76                    struct intel_batchbuffer *batch)
77 {
78
79     BEGIN_BATCH(batch, 8);
80
81     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
82     OUT_BATCH(batch, 0);                                        /* Scratch Space Base Pointer and Space */
83     OUT_BATCH(batch,
84               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
85               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
86               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
87     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
88     OUT_BATCH(batch,
89               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
90               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
91     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
92     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
93     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
94     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
95         
96     ADVANCE_BATCH(batch);
97
98 }
99
100 static void
101 gen6_gpe_curbe_load(VADriverContextP ctx,
102                     struct i965_gpe_context *gpe_context,
103                     struct intel_batchbuffer *batch)
104 {
105     BEGIN_BATCH(batch, 4);
106
107     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
108     OUT_BATCH(batch, 0);
109     OUT_BATCH(batch, gpe_context->curbe.length);
110     OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
111
112     ADVANCE_BATCH(batch);
113 }
114
115 static void
116 gen6_gpe_idrt(VADriverContextP ctx,
117               struct i965_gpe_context *gpe_context,
118               struct intel_batchbuffer *batch)
119 {
120     BEGIN_BATCH(batch, 4);
121
122     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
123     OUT_BATCH(batch, 0);
124     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
125     OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
126
127     ADVANCE_BATCH(batch);
128 }
129
130 void
131 i965_gpe_load_kernels(VADriverContextP ctx,
132                       struct i965_gpe_context *gpe_context,
133                       struct i965_kernel *kernel_list,
134                       unsigned int num_kernels)
135 {
136     struct i965_driver_data *i965 = i965_driver_data(ctx);
137     int i;
138
139     assert(num_kernels <= MAX_GPE_KERNELS);
140     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
141     gpe_context->num_kernels = num_kernels;
142
143     for (i = 0; i < num_kernels; i++) {
144         struct i965_kernel *kernel = &gpe_context->kernels[i];
145
146         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
147                                   kernel->name, 
148                                   kernel->size,
149                                   0x1000);
150         assert(kernel->bo);
151         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
152     }
153 }
154
155 void
156 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
157 {
158     int i;
159
160     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
161     gpe_context->surface_state_binding_table.bo = NULL;
162
163     dri_bo_unreference(gpe_context->idrt.bo);
164     gpe_context->idrt.bo = NULL;
165
166     dri_bo_unreference(gpe_context->curbe.bo);
167     gpe_context->curbe.bo = NULL;
168
169     for (i = 0; i < gpe_context->num_kernels; i++) {
170         struct i965_kernel *kernel = &gpe_context->kernels[i];
171
172         dri_bo_unreference(kernel->bo);
173         kernel->bo = NULL;
174     }
175 }
176
177 void
178 i965_gpe_context_init(VADriverContextP ctx,
179                       struct i965_gpe_context *gpe_context)
180 {
181     struct i965_driver_data *i965 = i965_driver_data(ctx);
182     dri_bo *bo;
183
184     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
185     bo = dri_bo_alloc(i965->intel.bufmgr,
186                       "surface state & binding table",
187                       gpe_context->surface_state_binding_table.length,
188                       4096);
189     assert(bo);
190     gpe_context->surface_state_binding_table.bo = bo;
191
192     dri_bo_unreference(gpe_context->idrt.bo);
193     bo = dri_bo_alloc(i965->intel.bufmgr,
194                       "interface descriptor table",
195                       gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
196                       4096);
197     assert(bo);
198     gpe_context->idrt.bo = bo;
199
200     dri_bo_unreference(gpe_context->curbe.bo);
201     bo = dri_bo_alloc(i965->intel.bufmgr,
202                       "curbe buffer",
203                       gpe_context->curbe.length,
204                       4096);
205     assert(bo);
206     gpe_context->curbe.bo = bo;
207 }
208
209 void
210 gen6_gpe_pipeline_setup(VADriverContextP ctx,
211                         struct i965_gpe_context *gpe_context,
212                         struct intel_batchbuffer *batch)
213 {
214     intel_batchbuffer_emit_mi_flush(batch);
215
216     i965_gpe_select(ctx, gpe_context, batch);
217     gen6_gpe_state_base_address(ctx, gpe_context, batch);
218     gen6_gpe_vfe_state(ctx, gpe_context, batch);
219     gen6_gpe_curbe_load(ctx, gpe_context, batch);
220     gen6_gpe_idrt(ctx, gpe_context, batch);
221 }
222
223 static void
224 gen8_gpe_pipeline_end(VADriverContextP ctx,
225                       struct i965_gpe_context *gpe_context,
226                       struct intel_batchbuffer *batch)
227 {
228     /* No thing to do */
229 }
230
231 static void
232 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
233 {
234     switch (tiling) {
235     case I915_TILING_NONE:
236         ss->ss3.tiled_surface = 0;
237         ss->ss3.tile_walk = 0;
238         break;
239     case I915_TILING_X:
240         ss->ss3.tiled_surface = 1;
241         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
242         break;
243     case I915_TILING_Y:
244         ss->ss3.tiled_surface = 1;
245         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
246         break;
247     }
248 }
249
250 static void
251 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
252 {
253     switch (tiling) {
254     case I915_TILING_NONE:
255         ss->ss2.tiled_surface = 0;
256         ss->ss2.tile_walk = 0;
257         break;
258     case I915_TILING_X:
259         ss->ss2.tiled_surface = 1;
260         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
261         break;
262     case I915_TILING_Y:
263         ss->ss2.tiled_surface = 1;
264         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
265         break;
266     }
267 }
268
269 static void
270 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
271 {
272     switch (tiling) {
273     case I915_TILING_NONE:
274         ss->ss0.tiled_surface = 0;
275         ss->ss0.tile_walk = 0;
276         break;
277     case I915_TILING_X:
278         ss->ss0.tiled_surface = 1;
279         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
280         break;
281     case I915_TILING_Y:
282         ss->ss0.tiled_surface = 1;
283         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
284         break;
285     }
286 }
287
288 static void
289 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
290 {
291     switch (tiling) {
292     case I915_TILING_NONE:
293         ss->ss2.tiled_surface = 0;
294         ss->ss2.tile_walk = 0;
295         break;
296     case I915_TILING_X:
297         ss->ss2.tiled_surface = 1;
298         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
299         break;
300     case I915_TILING_Y:
301         ss->ss2.tiled_surface = 1;
302         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
303         break;
304     }
305 }
306
307 static void
308 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
309 {
310     switch (tiling) {
311     case I915_TILING_NONE:
312         ss->ss0.tiled_surface = 0;
313         ss->ss0.tile_walk = 0;
314         break;
315     case I915_TILING_X:
316         ss->ss0.tiled_surface = 1;
317         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
318         break;
319     case I915_TILING_Y:
320         ss->ss0.tiled_surface = 1;
321         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
322         break;
323     }
324 }
325
326 static void
327 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
328 {
329     switch (tiling) {
330     case I915_TILING_NONE:
331         ss->ss2.tiled_surface = 0;
332         ss->ss2.tile_walk = 0;
333         break;
334     case I915_TILING_X:
335         ss->ss2.tiled_surface = 1;
336         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
337         break;
338     case I915_TILING_Y:
339         ss->ss2.tiled_surface = 1;
340         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
341         break;
342     }
343 }
344
345 static void
346 i965_gpe_set_surface2_state(VADriverContextP ctx,
347                             struct object_surface *obj_surface,
348                             struct i965_surface_state2 *ss)
349 {
350     int w, h, w_pitch;
351     unsigned int tiling, swizzle;
352
353     assert(obj_surface->bo);
354     assert(obj_surface->fourcc == VA_FOURCC_NV12);
355
356     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
357     w = obj_surface->orig_width;
358     h = obj_surface->orig_height;
359     w_pitch = obj_surface->width;
360
361     memset(ss, 0, sizeof(*ss));
362     /* ss0 */
363     ss->ss0.surface_base_address = obj_surface->bo->offset;
364     /* ss1 */
365     ss->ss1.cbcr_pixel_offset_v_direction = 2;
366     ss->ss1.width = w - 1;
367     ss->ss1.height = h - 1;
368     /* ss2 */
369     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
370     ss->ss2.interleave_chroma = 1;
371     ss->ss2.pitch = w_pitch - 1;
372     ss->ss2.half_pitch_for_chroma = 0;
373     i965_gpe_set_surface2_tiling(ss, tiling);
374     /* ss3: UV offset for interleave mode */
375     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
376     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
377 }
378
379 void
380 i965_gpe_surface2_setup(VADriverContextP ctx,
381                         struct i965_gpe_context *gpe_context,
382                         struct object_surface *obj_surface,
383                         unsigned long binding_table_offset,
384                         unsigned long surface_state_offset)
385 {
386     struct i965_surface_state2 *ss;
387     dri_bo *bo;
388
389     bo = gpe_context->surface_state_binding_table.bo;
390     dri_bo_map(bo, 1);
391     assert(bo->virtual);
392
393     ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
394     i965_gpe_set_surface2_state(ctx, obj_surface, ss);
395     dri_bo_emit_reloc(bo,
396                       I915_GEM_DOMAIN_RENDER, 0,
397                       0,
398                       surface_state_offset + offsetof(struct i965_surface_state2, ss0),
399                       obj_surface->bo);
400
401     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
402     dri_bo_unmap(bo);
403 }
404
405 static void
406 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
407                                     struct object_surface *obj_surface,
408                                     struct i965_surface_state *ss)
409 {
410     int w, h, w_pitch;
411     unsigned int tiling, swizzle;
412
413     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
414     w = obj_surface->orig_width;
415     h = obj_surface->orig_height;
416     w_pitch = obj_surface->width;
417
418     memset(ss, 0, sizeof(*ss));
419     /* ss0 */
420     ss->ss0.surface_type = I965_SURFACE_2D;
421     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
422     /* ss1 */
423     ss->ss1.base_addr = obj_surface->bo->offset;
424     /* ss2 */
425     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
426     ss->ss2.height = h - 1;
427     /* ss3 */
428     ss->ss3.pitch = w_pitch - 1;
429     i965_gpe_set_surface_tiling(ss, tiling);
430 }
431
432 void
433 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
434                                 struct i965_gpe_context *gpe_context,
435                                 struct object_surface *obj_surface,
436                                 unsigned long binding_table_offset,
437                                 unsigned long surface_state_offset,
438                                 int write_enabled)
439 {
440     struct i965_surface_state *ss;
441     dri_bo *bo;
442
443     bo = gpe_context->surface_state_binding_table.bo;
444     dri_bo_map(bo, True);
445     assert(bo->virtual);
446
447     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
448     i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
449     dri_bo_emit_reloc(bo,
450                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
451                       0,
452                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
453                       obj_surface->bo);
454
455     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
456     dri_bo_unmap(bo);
457 }
458
459 static void
460 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
461                                   struct i965_buffer_surface *buffer_surface,
462                                   struct i965_surface_state *ss)
463 {
464     int num_entries;
465
466     assert(buffer_surface->bo);
467     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
468
469     memset(ss, 0, sizeof(*ss));
470     /* ss0 */
471     ss->ss0.render_cache_read_mode = 1;
472     ss->ss0.surface_type = I965_SURFACE_BUFFER;
473     /* ss1 */
474     ss->ss1.base_addr = buffer_surface->bo->offset;
475     /* ss2 */
476     ss->ss2.width = ((num_entries - 1) & 0x7f);
477     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
478     /* ss3 */
479     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
480     ss->ss3.pitch = buffer_surface->pitch - 1;
481 }
482
483 void
484 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
485                              struct i965_gpe_context *gpe_context,
486                              struct i965_buffer_surface *buffer_surface,
487                              unsigned long binding_table_offset,
488                              unsigned long surface_state_offset)
489 {
490     struct i965_surface_state *ss;
491     dri_bo *bo;
492
493     bo = gpe_context->surface_state_binding_table.bo;
494     dri_bo_map(bo, 1);
495     assert(bo->virtual);
496
497     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
498     i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
499     dri_bo_emit_reloc(bo,
500                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
501                       0,
502                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
503                       buffer_surface->bo);
504
505     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
506     dri_bo_unmap(bo);
507 }
508
509 static void
510 gen7_gpe_set_surface2_state(VADriverContextP ctx,
511                             struct object_surface *obj_surface,
512                             struct gen7_surface_state2 *ss)
513 {
514     int w, h, w_pitch;
515     unsigned int tiling, swizzle;
516
517     assert(obj_surface->bo);
518     assert(obj_surface->fourcc == VA_FOURCC_NV12);
519
520     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
521     w = obj_surface->orig_width;
522     h = obj_surface->orig_height;
523     w_pitch = obj_surface->width;
524
525     memset(ss, 0, sizeof(*ss));
526     /* ss0 */
527     ss->ss0.surface_base_address = obj_surface->bo->offset;
528     /* ss1 */
529     ss->ss1.cbcr_pixel_offset_v_direction = 2;
530     ss->ss1.width = w - 1;
531     ss->ss1.height = h - 1;
532     /* ss2 */
533     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
534     ss->ss2.interleave_chroma = 1;
535     ss->ss2.pitch = w_pitch - 1;
536     ss->ss2.half_pitch_for_chroma = 0;
537     gen7_gpe_set_surface2_tiling(ss, tiling);
538     /* ss3: UV offset for interleave mode */
539     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
540     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
541 }
542
543 void
544 gen7_gpe_surface2_setup(VADriverContextP ctx,
545                         struct i965_gpe_context *gpe_context,
546                         struct object_surface *obj_surface,
547                         unsigned long binding_table_offset,
548                         unsigned long surface_state_offset)
549 {
550     struct gen7_surface_state2 *ss;
551     dri_bo *bo;
552
553     bo = gpe_context->surface_state_binding_table.bo;
554     dri_bo_map(bo, 1);
555     assert(bo->virtual);
556
557     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
558     gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
559     dri_bo_emit_reloc(bo,
560                       I915_GEM_DOMAIN_RENDER, 0,
561                       0,
562                       surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
563                       obj_surface->bo);
564
565     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
566     dri_bo_unmap(bo);
567 }
568
569 static void
570 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
571                                     struct object_surface *obj_surface,
572                                     struct gen7_surface_state *ss)
573 {
574     int w, h, w_pitch;
575     unsigned int tiling, swizzle;
576
577     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
578     w = obj_surface->orig_width;
579     h = obj_surface->orig_height;
580     w_pitch = obj_surface->width;
581
582     memset(ss, 0, sizeof(*ss));
583     /* ss0 */
584     ss->ss0.surface_type = I965_SURFACE_2D;
585     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
586     /* ss1 */
587     ss->ss1.base_addr = obj_surface->bo->offset;
588     /* ss2 */
589     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
590     ss->ss2.height = h - 1;
591     /* ss3 */
592     ss->ss3.pitch = w_pitch - 1;
593     gen7_gpe_set_surface_tiling(ss, tiling);
594 }
595
596 static void
597 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
598                                     struct object_surface *obj_surface,
599                                     struct gen7_surface_state *ss)
600 {
601     int w, w_pitch;
602     unsigned int tiling, swizzle;
603     int cbcr_offset;
604
605     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
606     w = obj_surface->orig_width;
607     w_pitch = obj_surface->width;
608
609     cbcr_offset = obj_surface->height * obj_surface->width;
610     memset(ss, 0, sizeof(*ss));
611     /* ss0 */
612     ss->ss0.surface_type = I965_SURFACE_2D;
613     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
614     /* ss1 */
615     ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
616     /* ss2 */
617     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
618     ss->ss2.height = (obj_surface->height / 2) -1;
619     /* ss3 */
620     ss->ss3.pitch = w_pitch - 1;
621     gen7_gpe_set_surface_tiling(ss, tiling);
622 }
623
624 void
625 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
626                                 struct i965_gpe_context *gpe_context,
627                                 struct object_surface *obj_surface,
628                                 unsigned long binding_table_offset,
629                                 unsigned long surface_state_offset,
630                                 int write_enabled)
631 {
632     struct gen7_surface_state *ss;
633     dri_bo *bo;
634
635     bo = gpe_context->surface_state_binding_table.bo;
636     dri_bo_map(bo, True);
637     assert(bo->virtual);
638
639     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
640     gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
641     dri_bo_emit_reloc(bo,
642                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
643                       0,
644                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
645                       obj_surface->bo);
646
647     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
648     dri_bo_unmap(bo);
649 }
650
651 void
652 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
653                                 struct i965_gpe_context *gpe_context,
654                                 struct object_surface *obj_surface,
655                                 unsigned long binding_table_offset,
656                                 unsigned long surface_state_offset,
657                                 int write_enabled)
658 {
659     struct gen7_surface_state *ss;
660     dri_bo *bo;
661     int cbcr_offset;
662
663         assert(obj_surface->fourcc == VA_FOURCC_NV12);
664     bo = gpe_context->surface_state_binding_table.bo;
665     dri_bo_map(bo, True);
666     assert(bo->virtual);
667
668     cbcr_offset = obj_surface->height * obj_surface->width;
669     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
670     gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
671     dri_bo_emit_reloc(bo,
672                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
673                       cbcr_offset,
674                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
675                       obj_surface->bo);
676
677     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
678     dri_bo_unmap(bo);
679 }
680
681
682 static void
683 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
684                                   struct i965_buffer_surface *buffer_surface,
685                                   struct gen7_surface_state *ss)
686 {
687     int num_entries;
688
689     assert(buffer_surface->bo);
690     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
691
692     memset(ss, 0, sizeof(*ss));
693     /* ss0 */
694     ss->ss0.surface_type = I965_SURFACE_BUFFER;
695     /* ss1 */
696     ss->ss1.base_addr = buffer_surface->bo->offset;
697     /* ss2 */
698     ss->ss2.width = ((num_entries - 1) & 0x7f);
699     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
700     /* ss3 */
701     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
702     ss->ss3.pitch = buffer_surface->pitch - 1;
703 }
704
705 void
706 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
707                              struct i965_gpe_context *gpe_context,
708                              struct i965_buffer_surface *buffer_surface,
709                              unsigned long binding_table_offset,
710                              unsigned long surface_state_offset)
711 {
712     struct gen7_surface_state *ss;
713     dri_bo *bo;
714
715     bo = gpe_context->surface_state_binding_table.bo;
716     dri_bo_map(bo, 1);
717     assert(bo->virtual);
718
719     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
720     gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
721     dri_bo_emit_reloc(bo,
722                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
723                       0,
724                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
725                       buffer_surface->bo);
726
727     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
728     dri_bo_unmap(bo);
729 }
730
731 static void
732 gen8_gpe_set_surface2_state(VADriverContextP ctx,
733                             struct object_surface *obj_surface,
734                             struct gen8_surface_state2 *ss)
735 {
736     struct i965_driver_data *i965 = i965_driver_data(ctx);
737     int w, h, w_pitch;
738     unsigned int tiling, swizzle;
739
740     assert(obj_surface->bo);
741     assert(obj_surface->fourcc == VA_FOURCC_NV12);
742
743     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
744     w = obj_surface->orig_width;
745     h = obj_surface->orig_height;
746     w_pitch = obj_surface->width;
747
748     memset(ss, 0, sizeof(*ss));
749     /* ss0 */
750     if (IS_GEN9(i965->intel.device_info))
751         ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
752
753     ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
754     ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
755     /* ss1 */
756     ss->ss1.cbcr_pixel_offset_v_direction = 2;
757     ss->ss1.width = w - 1;
758     ss->ss1.height = h - 1;
759     /* ss2 */
760     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
761     ss->ss2.interleave_chroma = 1;
762     ss->ss2.pitch = w_pitch - 1;
763     ss->ss2.half_pitch_for_chroma = 0;
764     gen8_gpe_set_surface2_tiling(ss, tiling);
765     /* ss3: UV offset for interleave mode */
766     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
767     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
768 }
769
770 void
771 gen8_gpe_surface2_setup(VADriverContextP ctx,
772                         struct i965_gpe_context *gpe_context,
773                         struct object_surface *obj_surface,
774                         unsigned long binding_table_offset,
775                         unsigned long surface_state_offset)
776 {
777     struct gen8_surface_state2 *ss;
778     dri_bo *bo;
779
780     bo = gpe_context->surface_state_binding_table.bo;
781     dri_bo_map(bo, 1);
782     assert(bo->virtual);
783
784     ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
785     gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
786     dri_bo_emit_reloc(bo,
787                       I915_GEM_DOMAIN_RENDER, 0,
788                       0,
789                       surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
790                       obj_surface->bo);
791
792     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
793     dri_bo_unmap(bo);
794 }
795
796 static void
797 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
798                                     struct object_surface *obj_surface,
799                                     struct gen8_surface_state *ss)
800 {
801     struct i965_driver_data *i965 = i965_driver_data(ctx);
802     int w, h, w_pitch;
803     unsigned int tiling, swizzle;
804
805     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
806     w = obj_surface->orig_width;
807     h = obj_surface->orig_height;
808     w_pitch = obj_surface->width;
809
810     memset(ss, 0, sizeof(*ss));
811     /* ss0 */
812     if (IS_GEN9(i965->intel.device_info))
813         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
814
815     ss->ss0.surface_type = I965_SURFACE_2D;
816     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
817     /* ss1 */
818     ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
819     ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
820     /* ss2 */
821     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
822     ss->ss2.height = h - 1;
823     /* ss3 */
824     ss->ss3.pitch = w_pitch - 1;
825     gen8_gpe_set_surface_tiling(ss, tiling);
826 }
827
828 static void
829 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
830                                     struct object_surface *obj_surface,
831                                     struct gen8_surface_state *ss)
832 {
833     struct i965_driver_data *i965 = i965_driver_data(ctx);
834     int w, w_pitch;
835     unsigned int tiling, swizzle;
836     int cbcr_offset;
837     uint64_t base_offset;
838
839     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
840     w = obj_surface->orig_width;
841     w_pitch = obj_surface->width;
842
843     cbcr_offset = obj_surface->height * obj_surface->width;
844     memset(ss, 0, sizeof(*ss));
845     /* ss0 */
846     if (IS_GEN9(i965->intel.device_info))
847         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
848
849     ss->ss0.surface_type = I965_SURFACE_2D;
850     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
851     /* ss1 */
852     base_offset = obj_surface->bo->offset64 + cbcr_offset;
853     ss->ss8.base_addr = (uint32_t) base_offset;
854     ss->ss9.base_addr_high = (uint32_t) (base_offset >> 32);
855     /* ss2 */
856     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
857     ss->ss2.height = (obj_surface->height / 2) -1;
858     /* ss3 */
859     ss->ss3.pitch = w_pitch - 1;
860     gen8_gpe_set_surface_tiling(ss, tiling);
861 }
862
863 void
864 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
865                                 struct i965_gpe_context *gpe_context,
866                                 struct object_surface *obj_surface,
867                                 unsigned long binding_table_offset,
868                                 unsigned long surface_state_offset,
869                                 int write_enabled)
870 {
871     struct gen8_surface_state *ss;
872     dri_bo *bo;
873
874     bo = gpe_context->surface_state_binding_table.bo;
875     dri_bo_map(bo, True);
876     assert(bo->virtual);
877
878     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
879     gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
880     dri_bo_emit_reloc(bo,
881                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
882                       0,
883                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
884                       obj_surface->bo);
885
886     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
887     dri_bo_unmap(bo);
888 }
889
890 void
891 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
892                                 struct i965_gpe_context *gpe_context,
893                                 struct object_surface *obj_surface,
894                                 unsigned long binding_table_offset,
895                                 unsigned long surface_state_offset,
896                                 int write_enabled)
897 {
898     struct gen8_surface_state *ss;
899     dri_bo *bo;
900     int cbcr_offset;
901
902         assert(obj_surface->fourcc == VA_FOURCC_NV12);
903     bo = gpe_context->surface_state_binding_table.bo;
904     dri_bo_map(bo, True);
905     assert(bo->virtual);
906
907     cbcr_offset = obj_surface->height * obj_surface->width;
908     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
909     gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
910     dri_bo_emit_reloc(bo,
911                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
912                       cbcr_offset,
913                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
914                       obj_surface->bo);
915
916     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
917     dri_bo_unmap(bo);
918 }
919
920
921 static void
922 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
923                                   struct i965_buffer_surface *buffer_surface,
924                                   struct gen8_surface_state *ss)
925 {
926     struct i965_driver_data *i965 = i965_driver_data(ctx);
927     int num_entries;
928
929     assert(buffer_surface->bo);
930     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
931
932     memset(ss, 0, sizeof(*ss));
933     /* ss0 */
934     ss->ss0.surface_type = I965_SURFACE_BUFFER;
935     if (IS_GEN9(i965->intel.device_info))
936         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
937
938     /* ss1 */
939     ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
940     ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
941     /* ss2 */
942     ss->ss2.width = ((num_entries - 1) & 0x7f);
943     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
944     /* ss3 */
945     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
946     ss->ss3.pitch = buffer_surface->pitch - 1;
947 }
948
949 void
950 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
951                              struct i965_gpe_context *gpe_context,
952                              struct i965_buffer_surface *buffer_surface,
953                              unsigned long binding_table_offset,
954                              unsigned long surface_state_offset)
955 {
956     struct gen8_surface_state *ss;
957     dri_bo *bo;
958
959     bo = gpe_context->surface_state_binding_table.bo;
960     dri_bo_map(bo, 1);
961     assert(bo->virtual);
962
963     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
964     gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
965     dri_bo_emit_reloc(bo,
966                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
967                       0,
968                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
969                       buffer_surface->bo);
970
971     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
972     dri_bo_unmap(bo);
973 }
974
975 static void
976 gen8_gpe_state_base_address(VADriverContextP ctx,
977                             struct i965_gpe_context *gpe_context,
978                             struct intel_batchbuffer *batch)
979 {
980     BEGIN_BATCH(batch, 16);
981
982     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
983
984     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
985     OUT_BATCH(batch, 0);
986     OUT_BATCH(batch, 0);
987
988         /*DW4 Surface state base address */
989     OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
990
991         /*DW6. Dynamic state base address */
992     if (gpe_context->dynamic_state.bo)
993         OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
994                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
995                   0, BASE_ADDRESS_MODIFY);
996     else {
997         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
998         OUT_BATCH(batch, 0);
999     }
1000
1001
1002         /*DW8. Indirect Object base address */
1003     if (gpe_context->indirect_state.bo)
1004         OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1005                   I915_GEM_DOMAIN_SAMPLER,
1006                   0, BASE_ADDRESS_MODIFY);
1007     else {
1008         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1009         OUT_BATCH(batch, 0);
1010     }
1011
1012
1013         /*DW10. Instruct base address */
1014     if (gpe_context->instruction_state.bo)
1015         OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1016                   I915_GEM_DOMAIN_INSTRUCTION,
1017                   0, BASE_ADDRESS_MODIFY);
1018     else {
1019         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1020         OUT_BATCH(batch, 0);
1021     }
1022
1023         /* DW12. Size limitation */
1024     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound      
1025     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
1026     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
1027     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
1028
1029     /*
1030       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
1031       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
1032     */
1033
1034     ADVANCE_BATCH(batch);
1035 }
1036
1037 static void
1038 gen8_gpe_vfe_state(VADriverContextP ctx,
1039                    struct i965_gpe_context *gpe_context,
1040                    struct intel_batchbuffer *batch)
1041 {
1042
1043     BEGIN_BATCH(batch, 9);
1044
1045     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1046     /* Scratch Space Base Pointer and Space */
1047     OUT_BATCH(batch, 0);    
1048     OUT_BATCH(batch, 0);
1049
1050     OUT_BATCH(batch,
1051               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
1052               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
1053               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
1054     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
1055     OUT_BATCH(batch,
1056               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
1057               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
1058
1059     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1060     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
1061     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
1062     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
1063         
1064     ADVANCE_BATCH(batch);
1065
1066 }
1067
1068
1069 static void
1070 gen8_gpe_curbe_load(VADriverContextP ctx,
1071                     struct i965_gpe_context *gpe_context,
1072                     struct intel_batchbuffer *batch)
1073 {
1074     BEGIN_BATCH(batch, 4);
1075
1076     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1077     OUT_BATCH(batch, 0);
1078     OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
1079     OUT_BATCH(batch, gpe_context->curbe.offset);
1080
1081     ADVANCE_BATCH(batch);
1082 }
1083
1084 static void
1085 gen8_gpe_idrt(VADriverContextP ctx,
1086               struct i965_gpe_context *gpe_context,
1087               struct intel_batchbuffer *batch)
1088 {
1089     BEGIN_BATCH(batch, 6);
1090
1091     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1092     OUT_BATCH(batch, 0);
1093
1094     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1095     OUT_BATCH(batch, 0);
1096     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
1097     OUT_BATCH(batch, gpe_context->idrt.offset);
1098
1099     ADVANCE_BATCH(batch);
1100 }
1101
1102
1103 void
1104 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1105                         struct i965_gpe_context *gpe_context,
1106                         struct intel_batchbuffer *batch)
1107 {
1108     intel_batchbuffer_emit_mi_flush(batch);
1109
1110     i965_gpe_select(ctx, gpe_context, batch);
1111     gen8_gpe_state_base_address(ctx, gpe_context, batch);
1112     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1113     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1114     gen8_gpe_idrt(ctx, gpe_context, batch);
1115 }
1116
1117 void
1118 gen8_gpe_context_init(VADriverContextP ctx,
1119                       struct i965_gpe_context *gpe_context)
1120 {
1121     struct i965_driver_data *i965 = i965_driver_data(ctx);
1122     dri_bo *bo;
1123     int bo_size;
1124     unsigned int start_offset, end_offset;
1125
1126     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1127     bo = dri_bo_alloc(i965->intel.bufmgr,
1128                       "surface state & binding table",
1129                       gpe_context->surface_state_binding_table.length,
1130                       4096);
1131     assert(bo);
1132     gpe_context->surface_state_binding_table.bo = bo;
1133
1134     bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
1135         ALIGN(gpe_context->curbe.length, 64) +
1136         gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
1137     dri_bo_unreference(gpe_context->dynamic_state.bo);
1138     bo = dri_bo_alloc(i965->intel.bufmgr,
1139                       "surface state & binding table",
1140                       bo_size,
1141                       4096);
1142     assert(bo);
1143     gpe_context->dynamic_state.bo = bo;
1144     gpe_context->dynamic_state.bo_size = bo_size;
1145
1146     end_offset = 0;
1147     gpe_context->dynamic_state.end_offset = 0;
1148
1149     /* Constant buffer offset */
1150     start_offset = ALIGN(end_offset, 64);
1151     dri_bo_unreference(gpe_context->curbe.bo);
1152     gpe_context->curbe.bo = bo;
1153     dri_bo_reference(gpe_context->curbe.bo);
1154     gpe_context->curbe.offset = start_offset;
1155     end_offset = start_offset + gpe_context->curbe.length;
1156
1157     /* Interface descriptor offset */
1158     start_offset = ALIGN(end_offset, 64);
1159     dri_bo_unreference(gpe_context->idrt.bo);
1160     gpe_context->idrt.bo = bo;
1161     dri_bo_reference(gpe_context->idrt.bo);
1162     gpe_context->idrt.offset = start_offset;
1163     end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;
1164
1165     /* Sampler state offset */
1166     start_offset = ALIGN(end_offset, 64);
1167     dri_bo_unreference(gpe_context->sampler.bo);
1168     gpe_context->sampler.bo = bo;
1169     dri_bo_reference(gpe_context->sampler.bo);
1170     gpe_context->sampler.offset = start_offset;
1171     end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;
1172
1173     /* update the end offset of dynamic_state */
1174     gpe_context->dynamic_state.end_offset = end_offset;
1175 }
1176
1177
1178 void
1179 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1180 {
1181     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1182     gpe_context->surface_state_binding_table.bo = NULL;
1183
1184     dri_bo_unreference(gpe_context->instruction_state.bo);
1185     gpe_context->instruction_state.bo = NULL;
1186
1187     dri_bo_unreference(gpe_context->dynamic_state.bo);
1188     gpe_context->dynamic_state.bo = NULL;
1189
1190     dri_bo_unreference(gpe_context->indirect_state.bo);
1191     gpe_context->indirect_state.bo = NULL;
1192
1193     dri_bo_unreference(gpe_context->curbe.bo);
1194     gpe_context->curbe.bo = NULL;
1195
1196     dri_bo_unreference(gpe_context->idrt.bo);
1197     gpe_context->idrt.bo = NULL;
1198
1199     dri_bo_unreference(gpe_context->sampler.bo);
1200     gpe_context->sampler.bo = NULL;
1201 }
1202
1203
1204 void
1205 gen8_gpe_load_kernels(VADriverContextP ctx,
1206                       struct i965_gpe_context *gpe_context,
1207                       struct i965_kernel *kernel_list,
1208                       unsigned int num_kernels)
1209 {
1210     struct i965_driver_data *i965 = i965_driver_data(ctx);
1211     int i, kernel_size = 0;
1212     unsigned int kernel_offset, end_offset;
1213     unsigned char *kernel_ptr;
1214     struct i965_kernel *kernel;
1215
1216     assert(num_kernels <= MAX_GPE_KERNELS);
1217     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1218     gpe_context->num_kernels = num_kernels;
1219
1220     for (i = 0; i < num_kernels; i++) {
1221         kernel = &gpe_context->kernels[i];
1222
1223         kernel_size += ALIGN(kernel->size, 64);
1224     }
1225
1226     gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1227                                   "kernel shader",
1228                                   kernel_size,
1229                                   0x1000);
1230     if (gpe_context->instruction_state.bo == NULL) {
1231         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1232         return;
1233     }
1234
1235     assert(gpe_context->instruction_state.bo);
1236
1237     gpe_context->instruction_state.bo_size = kernel_size;
1238     gpe_context->instruction_state.end_offset = 0;
1239     end_offset = 0;
1240
1241     dri_bo_map(gpe_context->instruction_state.bo, 1);
1242     kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1243     for (i = 0; i < num_kernels; i++) {
1244         kernel_offset = ALIGN(end_offset, 64);
1245         kernel = &gpe_context->kernels[i];
1246         kernel->kernel_offset = kernel_offset;
1247
1248         if (kernel->size) {
1249             memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1250
1251             end_offset = kernel_offset + kernel->size;
1252         }
1253     }
1254
1255     gpe_context->instruction_state.end_offset = end_offset;
1256
1257     dri_bo_unmap(gpe_context->instruction_state.bo);
1258
1259     return;
1260 }
1261
1262 static void
1263 gen9_gpe_state_base_address(VADriverContextP ctx,
1264                             struct i965_gpe_context *gpe_context,
1265                             struct intel_batchbuffer *batch)
1266 {
1267     BEGIN_BATCH(batch, 19);
1268
1269     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
1270
1271     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
1272     OUT_BATCH(batch, 0);
1273     OUT_BATCH(batch, 0);
1274
1275         /*DW4 Surface state base address */
1276     OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1277
1278         /*DW6. Dynamic state base address */
1279     if (gpe_context->dynamic_state.bo)
1280         OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1281                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1282                   I915_GEM_DOMAIN_RENDER, BASE_ADDRESS_MODIFY);
1283     else {
1284         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1285         OUT_BATCH(batch, 0);
1286     }
1287
1288
1289         /*DW8. Indirect Object base address */
1290     if (gpe_context->indirect_state.bo)
1291         OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1292                   I915_GEM_DOMAIN_SAMPLER,
1293                   0, BASE_ADDRESS_MODIFY);
1294     else {
1295         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1296         OUT_BATCH(batch, 0);
1297     }
1298
1299
1300         /*DW10. Instruct base address */
1301     if (gpe_context->instruction_state.bo)
1302         OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1303                   I915_GEM_DOMAIN_INSTRUCTION,
1304                   0, BASE_ADDRESS_MODIFY);
1305     else {
1306         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1307         OUT_BATCH(batch, 0);
1308     }
1309
1310
1311         /* DW12. Size limitation */
1312     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound
1313     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
1314     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
1315     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
1316
1317     /* the bindless surface state address */
1318     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1319     OUT_BATCH(batch, 0);
1320     OUT_BATCH(batch, 0xFFFFF000);
1321
1322     ADVANCE_BATCH(batch);
1323 }
1324
1325 static void
1326 gen9_gpe_select(VADriverContextP ctx,
1327                 struct i965_gpe_context *gpe_context,
1328                 struct intel_batchbuffer *batch)
1329 {
1330     BEGIN_BATCH(batch, 1);
1331     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1332                      GEN9_PIPELINE_SELECTION_MASK |
1333                      GEN9_MEDIA_DOP_GATE_OFF |
1334                      GEN9_MEDIA_DOP_GATE_MASK |
1335                      GEN9_FORCE_MEDIA_AWAKE_ON |
1336                      GEN9_FORCE_MEDIA_AWAKE_MASK);
1337     ADVANCE_BATCH(batch);
1338 }
1339
1340 void
1341 gen9_gpe_pipeline_setup(VADriverContextP ctx,
1342                         struct i965_gpe_context *gpe_context,
1343                         struct intel_batchbuffer *batch)
1344 {
1345     intel_batchbuffer_emit_mi_flush(batch);
1346
1347     gen9_gpe_select(ctx, gpe_context, batch);
1348     gen9_gpe_state_base_address(ctx, gpe_context, batch);
1349     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1350     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1351     gen8_gpe_idrt(ctx, gpe_context, batch);
1352 }
1353
1354 void
1355 gen9_gpe_pipeline_end(VADriverContextP ctx,
1356                       struct i965_gpe_context *gpe_context,
1357                       struct intel_batchbuffer *batch)
1358 {
1359     BEGIN_BATCH(batch, 1);
1360     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1361               GEN9_PIPELINE_SELECTION_MASK |
1362               GEN9_MEDIA_DOP_GATE_ON |
1363               GEN9_MEDIA_DOP_GATE_MASK |
1364               GEN9_FORCE_MEDIA_AWAKE_OFF |
1365               GEN9_FORCE_MEDIA_AWAKE_MASK);
1366     ADVANCE_BATCH(batch);
1367 }
1368
1369 Bool
1370 i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
1371                            struct i965_gpe_resource *res,
1372                            int size,
1373                            const char *name)
1374 {
1375     if (!res || !size)
1376         return false;
1377
1378     res->size = size;
1379     res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1380     res->map = NULL;
1381
1382     return (res->bo != NULL);
1383 }
1384
1385 void
1386 i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
1387                                        struct object_surface *obj_surface)
1388 {
1389     unsigned int swizzle;
1390
1391     res->type = I965_GPE_RESOURCE_2D;
1392     res->width = obj_surface->orig_width;
1393     res->height = obj_surface->orig_height;
1394     res->pitch = obj_surface->width;
1395     res->size = obj_surface->size;
1396     res->cb_cr_pitch = obj_surface->cb_cr_pitch;
1397     res->x_cb_offset = obj_surface->x_cb_offset;
1398     res->y_cb_offset = obj_surface->y_cb_offset;
1399     res->bo = obj_surface->bo;
1400     res->map = NULL;
1401
1402     dri_bo_reference(res->bo);
1403     dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
1404 }
1405
1406 void
1407 i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
1408                                        dri_bo *bo)
1409 {
1410     unsigned int swizzle;
1411
1412     res->type = I965_GPE_RESOURCE_BUFFER;
1413     res->width = bo->size;
1414     res->height = 1;
1415     res->pitch = res->width;
1416     res->size = res->pitch * res->width;
1417     res->bo = bo;
1418     res->map = NULL;
1419
1420     dri_bo_reference(res->bo);
1421     dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1422 }
1423
1424 void
1425 i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
1426                                    dri_bo *bo,
1427                                    unsigned int width,
1428                                    unsigned int height,
1429                                    unsigned int pitch)
1430 {
1431     unsigned int swizzle;
1432
1433     res->type = I965_GPE_RESOURCE_2D;
1434     res->width = width;
1435     res->height = height;
1436     res->pitch = pitch;
1437     res->size = res->pitch * res->width;
1438     res->bo = bo;
1439     res->map = NULL;
1440
1441     dri_bo_reference(res->bo);
1442     dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1443 }
1444
1445 void
1446 i965_zero_gpe_resource(struct i965_gpe_resource *res)
1447 {
1448     if (res->bo) {
1449         dri_bo_map(res->bo, 1);
1450         memset(res->bo->virtual, 0, res->size);
1451         dri_bo_unmap(res->bo);
1452     }
1453 }
1454
1455 void
1456 i965_free_gpe_resource(struct i965_gpe_resource *res)
1457 {
1458     dri_bo_unreference(res->bo);
1459     res->bo = NULL;
1460     res->map = NULL;
1461 }
1462
1463 void *
1464 i965_map_gpe_resource(struct i965_gpe_resource *res)
1465 {
1466     int ret;
1467
1468     if (res->bo) {
1469         ret = dri_bo_map(res->bo, 1);
1470
1471         if (ret == 0)
1472             res->map = res->bo->virtual;
1473         else
1474             res->map = NULL;
1475     } else
1476         res->map = NULL;
1477
1478     return res->map;
1479 }
1480
1481 void
1482 i965_unmap_gpe_resource(struct i965_gpe_resource *res)
1483 {
1484     if (res->bo && res->map)
1485         dri_bo_unmap(res->bo);
1486
1487     res->map = NULL;
1488 }
1489
1490 void
1491 gen8_gpe_mi_flush_dw(VADriverContextP ctx,
1492                      struct intel_batchbuffer *batch,
1493                      struct gpe_mi_flush_dw_parameter *params)
1494 {
1495     int video_pipeline_cache_invalidate = 0;
1496     int post_sync_operation = MI_FLUSH_DW_NOWRITE;
1497
1498     if (params->video_pipeline_cache_invalidate)
1499         video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
1500
1501     if (params->bo)
1502         post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
1503
1504     __OUT_BATCH(batch, (MI_FLUSH_DW2 |
1505                         video_pipeline_cache_invalidate |
1506                         post_sync_operation |
1507                         (5 - 2))); /* Always use PPGTT */
1508
1509     if (params->bo) {
1510         __OUT_RELOC64(batch,
1511                       params->bo,
1512                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1513                       params->offset);
1514     } else {
1515         __OUT_BATCH(batch, 0);
1516         __OUT_BATCH(batch, 0);
1517     }
1518
1519     __OUT_BATCH(batch, params->dw0);
1520     __OUT_BATCH(batch, params->dw1);
1521 }
1522
1523 void
1524 gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
1525                            struct intel_batchbuffer *batch,
1526                            struct gpe_mi_store_data_imm_parameter *params)
1527 {
1528     if (params->is_qword) {
1529         __OUT_BATCH(batch, MI_STORE_DATA_IMM |
1530                     (1 << 21) |
1531                     (5 - 2)); /* Always use PPGTT */
1532     } else {
1533         __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
1534     }
1535
1536     __OUT_RELOC64(batch,
1537                   params->bo,
1538                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1539                   params->offset);
1540     __OUT_BATCH(batch, params->dw0);
1541
1542     if (params->is_qword)
1543         __OUT_BATCH(batch, params->dw1);
1544 }
1545
1546 void
1547 gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
1548                                struct intel_batchbuffer *batch,
1549                                struct gpe_mi_store_register_mem_parameter *params)
1550 {
1551     __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1552     __OUT_BATCH(batch, params->mmio_offset);
1553     __OUT_RELOC64(batch,
1554                   params->bo,
1555                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1556                   params->offset);
1557 }
1558
1559 void
1560 gen8_gpe_mi_load_register_mem(VADriverContextP ctx,
1561                               struct intel_batchbuffer *batch,
1562                               struct gpe_mi_load_register_mem_parameter *params)
1563 {
1564     __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1565     __OUT_BATCH(batch, params->mmio_offset);
1566     __OUT_RELOC64(batch,
1567                   params->bo,
1568                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1569                   params->offset);
1570 }
1571
1572 void
1573 gen8_gpe_mi_load_register_imm(VADriverContextP ctx,
1574                               struct intel_batchbuffer *batch,
1575                               struct gpe_mi_load_register_imm_parameter *params)
1576 {
1577     __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
1578     __OUT_BATCH(batch, params->mmio_offset);
1579     __OUT_BATCH(batch, params->data);
1580 }
1581
1582 void
1583 gen8_gpe_mi_load_register_reg(VADriverContextP ctx,
1584                               struct intel_batchbuffer *batch,
1585                               struct gpe_mi_load_register_reg_parameter *params)
1586 {
1587     __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
1588     __OUT_BATCH(batch, params->src_mmio_offset);
1589     __OUT_BATCH(batch, params->dst_mmio_offset);
1590 }
1591
1592 void
1593 gen9_gpe_mi_math(VADriverContextP ctx,
1594                  struct intel_batchbuffer *batch,
1595                  struct gpe_mi_math_parameter *params)
1596 {
1597     __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
1598     intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
1599 }
1600
1601 void
1602 gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
1603                                          struct intel_batchbuffer *batch,
1604                                          struct gpe_mi_conditional_batch_buffer_end_parameter *params)
1605 {
1606     int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
1607
1608     if (params->compare_mask_mode_disabled)
1609         compare_mask_mode_enabled = 0;
1610
1611     __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
1612                         (1 << 21) |
1613                         compare_mask_mode_enabled |
1614                         (4 - 2))); /* Always use PPGTT */
1615     __OUT_BATCH(batch, params->compare_data);
1616     __OUT_RELOC64(batch,
1617                   params->bo,
1618                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1619                   params->offset);
1620 }
1621
1622 void
1623 gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
1624                                struct intel_batchbuffer *batch,
1625                                struct gpe_mi_batch_buffer_start_parameter *params)
1626 {
1627     __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
1628                         (!!params->is_second_level << 22) |
1629                         (!params->use_global_gtt << 8) |
1630                         (1 << 0)));
1631     __OUT_RELOC64(batch,
1632                 params->bo,
1633                 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1634                 params->offset);
1635 }
1636
1637 void
1638 gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
1639                                     struct i965_gpe_context *gpe_context,
1640                                     struct gpe_dynamic_state_parameter *ds)
1641 {
1642     if (!ds->bo || !gpe_context)
1643         return;
1644
1645     dri_bo_unreference(gpe_context->dynamic_state.bo);
1646     gpe_context->dynamic_state.bo = ds->bo;
1647     dri_bo_reference(gpe_context->dynamic_state.bo);
1648     gpe_context->dynamic_state.bo_size = ds->bo_size;
1649
1650     /* curbe buffer is a part of the dynamic buffer */
1651     dri_bo_unreference(gpe_context->curbe.bo);
1652     gpe_context->curbe.bo = ds->bo;
1653     dri_bo_reference(gpe_context->curbe.bo);
1654     gpe_context->curbe.offset = ds->curbe_offset;
1655
1656     /* idrt buffer is a part of the dynamic buffer */
1657     dri_bo_unreference(gpe_context->idrt.bo);
1658     gpe_context->idrt.bo = ds->bo;
1659     dri_bo_reference(gpe_context->idrt.bo);
1660     gpe_context->idrt.offset = ds->idrt_offset;
1661
1662     /* sampler buffer is a part of the dynamic buffer */
1663     dri_bo_unreference(gpe_context->sampler.bo);
1664     gpe_context->sampler.bo = ds->bo;
1665     dri_bo_reference(gpe_context->sampler.bo);
1666     gpe_context->sampler.offset = ds->sampler_offset;
1667
1668     return;
1669 }
1670
1671 void *
1672 i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
1673 {
1674     dri_bo_map(gpe_context->curbe.bo, 1);
1675
1676     return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
1677 }
1678
1679 void
1680 i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
1681 {
1682     dri_bo_unmap(gpe_context->curbe.bo);
1683 }
1684
1685 void
1686 gen9_gpe_reset_binding_table(VADriverContextP ctx,
1687                               struct i965_gpe_context *gpe_context)
1688 {
1689     unsigned int *binding_table;
1690     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
1691     int i;
1692
1693     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1694     binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
1695
1696     for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
1697         *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
1698     }
1699
1700     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1701 }
1702
1703 void
1704 gen8_gpe_setup_interface_data(VADriverContextP ctx,
1705                               struct i965_gpe_context *gpe_context)
1706 {
1707     struct gen8_interface_descriptor_data *desc;
1708     int i;
1709     dri_bo *bo;
1710     unsigned char *desc_ptr;
1711
1712     bo = gpe_context->idrt.bo;
1713     dri_bo_map(bo, 1);
1714     assert(bo->virtual);
1715     desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
1716     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1717
1718     for (i = 0; i < gpe_context->num_kernels; i++) {
1719         struct i965_kernel *kernel;
1720
1721         kernel = &gpe_context->kernels[i];
1722         assert(sizeof(*desc) == 32);
1723
1724         /*Setup the descritor table*/
1725         memset(desc, 0, sizeof(*desc));
1726         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1727         desc->desc3.sampler_count = 0;
1728         desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset >> 5);
1729         desc->desc4.binding_table_entry_count = 0;
1730         desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
1731         desc->desc5.constant_urb_entry_read_offset = 0;
1732         desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1733
1734         desc++;
1735     }
1736
1737     dri_bo_unmap(bo);
1738 }
1739
1740 static void
1741 gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
1742 {
1743     switch (tiling) {
1744     case I915_TILING_NONE:
1745         ss->ss0.tiled_surface = 0;
1746         ss->ss0.tile_walk = 0;
1747         break;
1748     case I915_TILING_X:
1749         ss->ss0.tiled_surface = 1;
1750         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1751         break;
1752     case I915_TILING_Y:
1753         ss->ss0.tiled_surface = 1;
1754         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1755         break;
1756     }
1757 }
1758
1759 static void
1760 gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
1761 {
1762     switch (tiling) {
1763     case I915_TILING_NONE:
1764         ss->ss2.tiled_surface = 0;
1765         ss->ss2.tile_walk = 0;
1766         break;
1767     case I915_TILING_X:
1768         ss->ss2.tiled_surface = 1;
1769         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1770         break;
1771     case I915_TILING_Y:
1772         ss->ss2.tiled_surface = 1;
1773         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1774         break;
1775     }
1776 }
1777
1778 static void
1779 gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
1780                               unsigned int cacheability_control,
1781                               unsigned int format,
1782                               unsigned int tiling,
1783                               unsigned int width,
1784                               unsigned int height,
1785                               unsigned int pitch,
1786                               uint64_t base_offset,
1787                               unsigned int y_offset)
1788 {
1789     memset(ss, 0, sizeof(*ss));
1790
1791     /* Always set 1(align 4 mode) */
1792     ss->ss0.vertical_alignment = 1;
1793     ss->ss0.horizontal_alignment = 1;
1794
1795     ss->ss0.surface_format = format;
1796     ss->ss0.surface_type = I965_SURFACE_2D;
1797
1798     ss->ss1.surface_mocs = cacheability_control;
1799
1800     ss->ss2.width = width - 1;
1801     ss->ss2.height = height - 1;
1802
1803     ss->ss3.pitch = pitch - 1;
1804
1805     ss->ss5.y_offset = y_offset;
1806
1807     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1808     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1809     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1810     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1811
1812     ss->ss8.base_addr = (uint32_t)base_offset;
1813     ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1814
1815     gen9_gpe_set_surface_tiling(ss, tiling);
1816 }
1817
1818 /* This is only for NV12 format */
1819 static void
1820 gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
1821                                unsigned int v_direction,
1822                                unsigned int cacheability_control,
1823                                unsigned int format,
1824                                unsigned int tiling,
1825                                unsigned int width,
1826                                unsigned int height,
1827                                unsigned int pitch,
1828                                uint64_t base_offset,
1829                                unsigned int y_cb_offset)
1830 {
1831     memset(ss, 0, sizeof(*ss));
1832
1833     ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
1834     ss->ss1.width = width - 1;
1835     ss->ss1.height = height - 1;
1836
1837     ss->ss2.surface_format = format;
1838     ss->ss2.interleave_chroma = 1;
1839     ss->ss2.pitch = pitch - 1;
1840
1841     ss->ss3.y_offset_for_cb = y_cb_offset;
1842
1843     ss->ss5.surface_object_mocs = cacheability_control;
1844
1845     ss->ss6.base_addr = (uint32_t)base_offset;
1846     ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
1847
1848     gen9_gpe_set_surface2_tiling(ss, tiling);
1849 }
1850
1851 static void
1852 gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
1853                                    unsigned int cacheability_control,
1854                                    unsigned int format,
1855                                    unsigned int size,
1856                                    unsigned int pitch,
1857                                    uint64_t base_offset)
1858 {
1859     memset(ss, 0, sizeof(*ss));
1860
1861     ss->ss0.surface_format = format;
1862     ss->ss0.surface_type = I965_SURFACE_BUFFER;
1863
1864     ss->ss1.surface_mocs = cacheability_control;
1865
1866     ss->ss2.width = (size - 1) & 0x7F;
1867     ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
1868
1869     ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
1870     ss->ss3.pitch = pitch - 1;
1871
1872     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1873     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1874     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1875     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1876
1877     ss->ss8.base_addr = (uint32_t)base_offset;
1878     ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1879 }
1880
1881 void
1882 gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
1883                              struct i965_gpe_surface *gpe_surface,
1884                              int index)
1885 {
1886     char *buf;
1887     unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
1888     unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
1889         index * SURFACE_STATE_PADDED_SIZE_GEN9;
1890     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1891         index * 4;
1892     struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
1893
1894     dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
1895
1896     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1897     buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
1898     *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
1899
1900     if (gpe_surface->is_2d_surface && gpe_surface->is_override_offset) {
1901         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1902
1903         width = gpe_resource->width;
1904         height = gpe_resource->height;
1905         pitch = gpe_resource->pitch;
1906
1907         if (gpe_surface->is_media_block_rw) {
1908             if (gpe_surface->is_16bpp)
1909                 width = (ALIGN(width * 2, 4) >> 2);
1910             else
1911                 width = (ALIGN(width, 4) >> 2);
1912         }
1913
1914
1915         gen9_gpe_set_2d_surface_state(ss,
1916                                       gpe_surface->cacheability_control,
1917                                       gpe_surface->format,
1918                                       tiling,
1919                                       width, height, pitch,
1920                                       gpe_resource->bo->offset64 + gpe_surface->offset,
1921                                       0);
1922
1923         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1924                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1925                           gpe_surface->offset,
1926                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1927                           gpe_resource->bo);
1928     } else if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
1929         unsigned int cbcr_offset;
1930         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1931
1932         width = gpe_resource->width;
1933         height = gpe_resource->height / 2;
1934         pitch = gpe_resource->pitch;
1935
1936         if (gpe_surface->is_media_block_rw) {
1937             if (gpe_surface->is_16bpp)
1938                 width = (ALIGN(width * 2, 4) >> 2);
1939             else
1940                 width = (ALIGN(width, 4) >> 2);
1941         }
1942
1943         if (tiling == I915_TILING_Y) {
1944             tile_alignment = 32;
1945         } else if (tiling == I915_TILING_X) {
1946             tile_alignment = 8;
1947         } else
1948             tile_alignment = 1;
1949
1950         y_offset = (gpe_resource->y_cb_offset % tile_alignment);
1951         cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
1952
1953         gen9_gpe_set_2d_surface_state(ss,
1954                                       gpe_surface->cacheability_control,
1955                                       I965_SURFACEFORMAT_R16_UINT,
1956                                       tiling,
1957                                       width, height, pitch,
1958                                       gpe_resource->bo->offset64 + cbcr_offset,
1959                                       y_offset);
1960
1961         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1962                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1963                           cbcr_offset,
1964                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1965                           gpe_resource->bo);
1966     } else if (gpe_surface->is_2d_surface) {
1967         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1968
1969         width = gpe_resource->width;
1970         height = gpe_resource->height;
1971         pitch = gpe_resource->pitch;
1972
1973         if (gpe_surface->is_media_block_rw) {
1974             if (gpe_surface->is_16bpp)
1975                 width = (ALIGN(width * 2, 4) >> 2);
1976             else
1977                 width = (ALIGN(width, 4) >> 2);
1978         }
1979
1980         gen9_gpe_set_2d_surface_state(ss,
1981                                       gpe_surface->cacheability_control,
1982                                       gpe_surface->format,
1983                                       tiling,
1984                                       width, height, pitch,
1985                                       gpe_resource->bo->offset64,
1986                                       y_offset);
1987
1988         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1989                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1990                           0,
1991                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1992                           gpe_resource->bo);
1993     } else if (gpe_surface->is_adv_surface) {
1994         struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
1995
1996         width = gpe_resource->width;
1997         height = gpe_resource->height;
1998         pitch = gpe_resource->pitch;
1999
2000         gen9_gpe_set_adv_surface_state(ss,
2001                                        gpe_surface->v_direction,
2002                                        gpe_surface->cacheability_control,
2003                                        MFX_SURFACE_PLANAR_420_8,
2004                                        tiling,
2005                                        width, height, pitch,
2006                                        gpe_resource->bo->offset64,
2007                                        gpe_resource->y_cb_offset);
2008
2009         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2010                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2011                           0,
2012                           surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
2013                           gpe_resource->bo);
2014     } else {
2015         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
2016         unsigned int format;
2017
2018         assert(gpe_surface->is_buffer);
2019
2020         if (gpe_surface->is_raw_buffer) {
2021             format = I965_SURFACEFORMAT_RAW;
2022             pitch = 1;
2023         } else {
2024             format = I965_SURFACEFORMAT_R32_UINT;
2025             pitch = sizeof(unsigned int);
2026         }
2027
2028         gen9_gpe_set_buffer2_surface_state(ss,
2029                                            gpe_surface->cacheability_control,
2030                                            format,
2031                                            gpe_surface->size,
2032                                            pitch,
2033                                            gpe_resource->bo->offset64 + gpe_surface->offset);
2034
2035         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2036                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2037                           gpe_surface->offset,
2038                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
2039                           gpe_resource->bo);
2040     }
2041
2042     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2043 }
2044
2045 bool
2046 i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
2047                            struct i965_gpe_resource *res,
2048                            int width,
2049                            int height,
2050                            int pitch,
2051                            const char *name)
2052 {
2053     int bo_size;
2054
2055     if (!res)
2056         return false;
2057
2058     res->type = I965_GPE_RESOURCE_2D;
2059     res->width = width;
2060     res->height = height;
2061     res->pitch = pitch;
2062
2063     bo_size = ALIGN(height, 16) * pitch;
2064     res->size = bo_size;
2065
2066     res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
2067     res->map = NULL;
2068
2069     return true;
2070 }
2071
2072 void
2073 gen8_gpe_media_state_flush(VADriverContextP ctx,
2074                            struct i965_gpe_context *gpe_context,
2075                            struct intel_batchbuffer *batch)
2076 {
2077     BEGIN_BATCH(batch, 2);
2078
2079     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
2080     OUT_BATCH(batch, 0);
2081
2082     ADVANCE_BATCH(batch);
2083 }
2084
2085 void
2086 gen8_gpe_media_object(VADriverContextP ctx,
2087                       struct i965_gpe_context *gpe_context,
2088                       struct intel_batchbuffer *batch,
2089                       struct gpe_media_object_parameter *param)
2090 {
2091     int batch_size, subdata_size;
2092
2093     batch_size = 6;
2094     subdata_size = 0;
2095     if (param->pinline_data && param->inline_size) {
2096         subdata_size = ALIGN(param->inline_size, 4);
2097         batch_size += subdata_size / 4;
2098     }
2099     BEGIN_BATCH(batch, batch_size);
2100     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
2101     OUT_BATCH(batch, param->interface_offset);
2102     OUT_BATCH(batch, param->use_scoreboard << 21);
2103     OUT_BATCH(batch, 0);
2104     OUT_BATCH(batch, (param->scoreboard_y << 16 |
2105                       param->scoreboard_x));
2106     OUT_BATCH(batch, param->scoreboard_mask);
2107
2108     if (subdata_size)
2109         intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
2110
2111     ADVANCE_BATCH(batch);
2112 }
2113
2114 void
2115 gen8_gpe_media_object_walker(VADriverContextP ctx,
2116                              struct i965_gpe_context *gpe_context,
2117                              struct intel_batchbuffer *batch,
2118                              struct gpe_media_object_walker_parameter *param)
2119 {
2120     int walker_length;
2121
2122     walker_length = 17;
2123     if (param->inline_size)
2124         walker_length += ALIGN(param->inline_size, 4) / 4;
2125     BEGIN_BATCH(batch, walker_length);
2126     OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
2127     OUT_BATCH(batch, param->interface_offset);
2128     OUT_BATCH(batch, param->use_scoreboard << 21);
2129     OUT_BATCH(batch, 0);
2130     OUT_BATCH(batch, 0);
2131     OUT_BATCH(batch, (param->group_id_loop_select << 8 |
2132                       param->scoreboard_mask)); // DW5
2133     OUT_BATCH(batch, (param->color_count_minus1 << 24 |
2134                       param->middle_loop_extra_steps << 16 |
2135                       param->mid_loop_unit_y << 12 |
2136                       param->mid_loop_unit_x << 8));
2137     OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
2138                       (param->local_loop_exec_count & 0x3ff)));
2139     OUT_BATCH(batch, param->block_resolution.value);
2140     OUT_BATCH(batch, param->local_start.value);
2141     OUT_BATCH(batch, 0); // DW10
2142     OUT_BATCH(batch, param->local_outer_loop_stride.value);
2143     OUT_BATCH(batch, param->local_inner_loop_unit.value);
2144     OUT_BATCH(batch, param->global_resolution.value);
2145     OUT_BATCH(batch, param->global_start.value);
2146     OUT_BATCH(batch, param->global_outer_loop_stride.value);
2147     OUT_BATCH(batch, param->global_inner_loop_unit.value);
2148
2149     if (param->pinline_data && param->inline_size)
2150         intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
2151
2152     ADVANCE_BATCH(batch);
2153 }
2154
2155
2156 void
2157 intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
2158                                         struct gpe_media_object_walker_parameter *walker_param)
2159 {
2160     memset(walker_param, 0, sizeof(*walker_param));
2161
2162     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
2163
2164     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
2165     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
2166
2167     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
2168     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
2169
2170     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
2171     walker_param->global_outer_loop_stride.y = 0;
2172
2173     walker_param->global_inner_loop_unit.x = 0;
2174     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
2175
2176     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
2177     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
2178
2179     if (kernel_walker_param->no_dependency) {
2180         /* The no_dependency is used for VPP */
2181         walker_param->scoreboard_mask = 0;
2182         walker_param->use_scoreboard = 0;
2183         // Raster scan walking pattern
2184         walker_param->local_outer_loop_stride.x = 0;
2185         walker_param->local_outer_loop_stride.y = 1;
2186         walker_param->local_inner_loop_unit.x = 1;
2187         walker_param->local_inner_loop_unit.y = 0;
2188         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
2189         walker_param->local_end.y = 0;
2190     } else {
2191         walker_param->local_end.x = 0;
2192         walker_param->local_end.y = 0;
2193
2194         // 26 degree
2195         walker_param->scoreboard_mask = 0x0F;
2196         walker_param->local_outer_loop_stride.x = 1;
2197         walker_param->local_outer_loop_stride.y = 0;
2198         walker_param->local_inner_loop_unit.x = -2;
2199         walker_param->local_inner_loop_unit.y = 1;
2200     }
2201 }
2202
2203 void
2204 gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
2205 {
2206     unsigned int *binding_table;
2207     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
2208     int i;
2209
2210     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
2211     binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
2212
2213     for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
2214         *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
2215     }
2216
2217     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2218 }
2219
2220 static void
2221 gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
2222                               unsigned int vert_line_stride_offset,
2223                               unsigned int vert_line_stride,
2224                               unsigned int cacheability_control,
2225                               unsigned int format,
2226                               unsigned int tiling,
2227                               unsigned int width,
2228                               unsigned int height,
2229                               unsigned int pitch,
2230                               unsigned int base_offset,
2231                               unsigned int y_offset)
2232 {
2233     memset(ss, 0, sizeof(*ss));
2234
2235     ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
2236     ss->ss0.vert_line_stride = vert_line_stride;
2237     ss->ss0.surface_format = format;
2238     ss->ss0.surface_type = I965_SURFACE_2D;
2239
2240     ss->ss1.surface_mocs = cacheability_control;
2241
2242     ss->ss2.width = width - 1;
2243     ss->ss2.height = height - 1;
2244
2245     ss->ss3.pitch = pitch - 1;
2246
2247     ss->ss5.y_offset = y_offset;
2248
2249     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
2250     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
2251     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
2252     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
2253
2254     ss->ss8.base_addr = base_offset;
2255
2256     gen8_gpe_set_surface_tiling(ss, tiling);
2257 }
2258
2259 static void
2260 gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
2261                                unsigned int v_direction,
2262                                unsigned int cacheability_control,
2263                                unsigned int format,
2264                                unsigned int tiling,
2265                                unsigned int width,
2266                                unsigned int height,
2267                                unsigned int pitch,
2268                                unsigned int base_offset,
2269                                unsigned int y_cb_offset)
2270 {
2271     memset(ss, 0, sizeof(*ss));
2272
2273     ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
2274     ss->ss1.width = width - 1;
2275     ss->ss1.height = height - 1;
2276
2277     ss->ss2.surface_format = format;
2278     ss->ss2.interleave_chroma = 1;
2279     ss->ss2.pitch = pitch - 1;
2280
2281     ss->ss3.y_offset_for_cb = y_cb_offset;
2282
2283     ss->ss5.surface_object_mocs = cacheability_control;
2284
2285     ss->ss6.base_addr = base_offset;
2286
2287     gen8_gpe_set_surface2_tiling(ss, tiling);
2288 }
2289
2290 static void
2291 gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
2292                                    unsigned int cacheability_control,
2293                                    unsigned int format,
2294                                    unsigned int size,
2295                                    unsigned int pitch,
2296                                    unsigned int base_offset)
2297 {
2298     memset(ss, 0, sizeof(*ss));
2299
2300     ss->ss0.surface_format = format;
2301     ss->ss0.surface_type = I965_SURFACE_BUFFER;
2302
2303     ss->ss1.surface_mocs = cacheability_control;
2304
2305     ss->ss2.width = (size - 1) & 0x7F;
2306     ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
2307
2308     ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
2309     ss->ss3.pitch = pitch - 1;
2310
2311     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
2312     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
2313     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
2314     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
2315
2316     ss->ss8.base_addr = base_offset;
2317 }
2318
2319 void
2320 gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
2321                              struct i965_gpe_surface *gpe_surface,
2322                              int index)
2323 {
2324     char *buf;
2325     unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
2326     unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
2327         index * SURFACE_STATE_PADDED_SIZE_GEN8;
2328     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
2329         index * 4;
2330     struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
2331
2332     dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
2333
2334     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
2335     buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
2336     *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
2337
2338     if (gpe_surface->is_2d_surface) {
2339         struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
2340         unsigned int target_offset;
2341
2342         width = gpe_resource->width;
2343         height = gpe_resource->height;
2344         pitch = gpe_resource->pitch;
2345
2346         if (gpe_surface->is_override_offset) {
2347             y_offset = 0;
2348             target_offset = gpe_surface->offset;
2349         } else if (gpe_surface->is_uv_surface) {
2350             height /= 2;
2351
2352             if (tiling == I915_TILING_Y) {
2353                 tile_alignment = 32;
2354             } else if (tiling == I915_TILING_X) {
2355                 tile_alignment = 8;
2356             } else
2357                 tile_alignment = 1;
2358
2359             y_offset = (gpe_resource->y_cb_offset % tile_alignment);
2360             target_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
2361         } else {
2362             y_offset = 0;
2363             target_offset = 0;
2364         }
2365
2366         if (gpe_surface->is_media_block_rw) {
2367             width = (ALIGN(width, 4) >> 2);
2368         }
2369
2370         gen8_gpe_set_2d_surface_state(ss,
2371                                       gpe_surface->vert_line_stride_offset,
2372                                       gpe_surface->vert_line_stride,
2373                                       gpe_surface->cacheability_control,
2374                                       gpe_surface->format,
2375                                       tiling,
2376                                       width, height, pitch,
2377                                       gpe_resource->bo->offset64 + target_offset,
2378                                       y_offset);
2379
2380         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2381                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2382                           target_offset,
2383                           surface_state_offset + offsetof(struct gen8_surface_state, ss8),
2384                           gpe_resource->bo);
2385     } else if (gpe_surface->is_adv_surface) {
2386         struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
2387
2388         width = gpe_resource->width;
2389         height = gpe_resource->height;
2390         pitch = gpe_resource->pitch;
2391
2392         gen8_gpe_set_adv_surface_state(ss,
2393                                        gpe_surface->v_direction,
2394                                        gpe_surface->cacheability_control,
2395                                        MFX_SURFACE_PLANAR_420_8,
2396                                        tiling,
2397                                        width, height, pitch,
2398                                        gpe_resource->bo->offset64,
2399                                        gpe_resource->y_cb_offset);
2400
2401         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2402                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2403                           0,
2404                           surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
2405                           gpe_resource->bo);
2406     } else {
2407         struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
2408         unsigned int format;
2409
2410         assert(gpe_surface->is_buffer);
2411
2412         if (gpe_surface->is_raw_buffer) {
2413             format = I965_SURFACEFORMAT_RAW;
2414             pitch = 1;
2415         } else {
2416             format = I965_SURFACEFORMAT_R32_UINT;
2417             pitch = sizeof(unsigned int);
2418         }
2419
2420         gen8_gpe_set_buffer2_surface_state(ss,
2421                                            gpe_surface->cacheability_control,
2422                                            format,
2423                                            gpe_surface->size,
2424                                            pitch,
2425                                            gpe_resource->bo->offset64 + gpe_surface->offset);
2426
2427         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2428                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2429                           gpe_surface->offset,
2430                           surface_state_offset + offsetof(struct gen8_surface_state, ss8),
2431                           gpe_resource->bo);
2432     }
2433
2434     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2435 }
2436
2437 void
2438 gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
2439                                          struct intel_batchbuffer *batch,
2440                                          struct gpe_mi_conditional_batch_buffer_end_parameter *param)
2441 {
2442     __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
2443                         (1 << 21) |
2444                         (4 - 2))); /* Always use PPGTT */
2445     __OUT_BATCH(batch, param->compare_data);
2446     __OUT_RELOC64(batch,
2447                   param->bo,
2448                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
2449                   param->offset);
2450
2451 }
2452
2453 void
2454 gen8_gpe_pipe_control(VADriverContextP ctx,
2455                       struct intel_batchbuffer *batch,
2456                       struct gpe_pipe_control_parameter *param)
2457 {
2458     int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2459     int dc_flush_enable = 0;
2460     int state_cache_invalidation_enable = 0;
2461     int constant_cache_invalidation_enable = 0;
2462     int vf_cache_invalidation_enable = 0;
2463     int instruction_cache_invalidation_enable = 0;
2464     int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
2465     int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
2466     int cs_stall_enable = !param->disable_cs_stall;
2467
2468     switch (param->flush_mode) {
2469     case PIPE_CONTROL_FLUSH_WRITE_CACHE:
2470         render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2471         dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
2472         break;
2473
2474     case PIPE_CONTROL_FLUSH_READ_CACHE:
2475         render_target_cache_flush_enable = 0;
2476         state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
2477         constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
2478         vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
2479         instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
2480         break;
2481
2482     case PIPE_CONTROL_FLUSH_NONE:
2483     default:
2484         render_target_cache_flush_enable = 0;
2485         break;
2486     }
2487
2488     if (param->bo) {
2489         post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
2490         use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
2491     } else {
2492         post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
2493         render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2494         state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
2495         constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
2496         vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
2497         instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
2498     }
2499
2500     __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
2501     __OUT_BATCH(batch, (render_target_cache_flush_enable |
2502                         dc_flush_enable |
2503                         state_cache_invalidation_enable |
2504                         constant_cache_invalidation_enable |
2505                         vf_cache_invalidation_enable |
2506                         instruction_cache_invalidation_enable |
2507                         post_sync_operation |
2508                         use_global_gtt |
2509                         cs_stall_enable |
2510                         CMD_PIPE_CONTROL_FLUSH_ENABLE));
2511
2512     if (param->bo)
2513         __OUT_RELOC64(batch,
2514                       param->bo,
2515                       I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
2516                       param->offset);
2517     else {
2518         __OUT_BATCH(batch, 0);
2519         __OUT_BATCH(batch, 0);
2520     }
2521
2522     __OUT_BATCH(batch, param->dw0);
2523     __OUT_BATCH(batch, param->dw1);
2524 }
2525
2526 bool
2527 i965_gpe_table_init(VADriverContextP ctx)
2528 {
2529     struct i965_driver_data *i965 = i965_driver_data(ctx);
2530     struct i965_gpe_table *gpe = &i965->gpe_table;
2531
2532     if (IS_GEN8(i965->intel.device_info)) {
2533         gpe->context_init = gen8_gpe_context_init;
2534         gpe->context_destroy = gen8_gpe_context_destroy;
2535         gpe->context_add_surface = gen8_gpe_context_add_surface;
2536         gpe->reset_binding_table = gen8_gpe_reset_binding_table;
2537         gpe->load_kernels = gen8_gpe_load_kernels;
2538         gpe->setup_interface_data = gen8_gpe_setup_interface_data;
2539         gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
2540         gpe->media_object = gen8_gpe_media_object;
2541         gpe->media_object_walker = gen8_gpe_media_object_walker;
2542         gpe->media_state_flush = gen8_gpe_media_state_flush;
2543         gpe->pipe_control = gen8_gpe_pipe_control;
2544         gpe->pipeline_end = gen8_gpe_pipeline_end;
2545         gpe->pipeline_setup = gen8_gpe_pipeline_setup;
2546         gpe->mi_conditional_batch_buffer_end = gen8_gpe_mi_conditional_batch_buffer_end;
2547         gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
2548         gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
2549         gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
2550         gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
2551         gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
2552         gpe->mi_store_data_imm =gen8_gpe_mi_store_data_imm;
2553         gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
2554     } else if (IS_GEN9(i965->intel.device_info)) {
2555         gpe->context_init = gen8_gpe_context_init;
2556         gpe->context_destroy = gen8_gpe_context_destroy;
2557         gpe->context_add_surface = gen9_gpe_context_add_surface;
2558         gpe->reset_binding_table = gen9_gpe_reset_binding_table;
2559         gpe->load_kernels = gen8_gpe_load_kernels;
2560         gpe->setup_interface_data = gen8_gpe_setup_interface_data;
2561         gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
2562         gpe->media_object = gen8_gpe_media_object;
2563         gpe->media_object_walker = gen8_gpe_media_object_walker;
2564         gpe->media_state_flush = gen8_gpe_media_state_flush;
2565         gpe->pipe_control = gen8_gpe_pipe_control;
2566         gpe->pipeline_end = gen9_gpe_pipeline_end;
2567         gpe->pipeline_setup = gen9_gpe_pipeline_setup;
2568         gpe->mi_conditional_batch_buffer_end = gen9_gpe_mi_conditional_batch_buffer_end;
2569         gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
2570         gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
2571         gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
2572         gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
2573         gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
2574         gpe->mi_store_data_imm =gen8_gpe_mi_store_data_imm;
2575         gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
2576     } else {
2577         // TODO: for other platforms
2578     }
2579
2580     return true;
2581 }
2582
2583 void
2584 i965_gpe_table_terminate(VADriverContextP ctx)
2585 {
2586
2587 }