OSDN Git Service

Move interface descriptor remapping table related settings to the inner structure...
[android-x86/hardware-intel-common-vaapi.git] / src / i965_gpe_utils.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Xiang Haihao <haihao.xiang@intel.com>
25  */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_gpe_utils.h"
36
37 static void
38 i965_gpe_select(VADriverContextP ctx,
39                 struct i965_gpe_context *gpe_context,
40                 struct intel_batchbuffer *batch)
41 {
42     BEGIN_BATCH(batch, 1);
43     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
44     ADVANCE_BATCH(batch);
45 }
46
47 static void
48 gen6_gpe_state_base_address(VADriverContextP ctx,
49                             struct i965_gpe_context *gpe_context,
50                             struct intel_batchbuffer *batch)
51 {
52     BEGIN_BATCH(batch, 10);
53
54     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
55     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Base Address */
56     OUT_RELOC(batch,
57               gpe_context->surface_state_binding_table.bo,
58               I915_GEM_DOMAIN_INSTRUCTION,
59               0,
60               BASE_ADDRESS_MODIFY);                     /* Surface state base address */
61     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Base Address */
62     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Base Address */
63     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Base Address */
64     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Access Upper Bound */
65     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Access Upper Bound */
66     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Access Upper Bound */
67     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Access Upper Bound */
68
69     ADVANCE_BATCH(batch);
70 }
71
72 static void
73 gen6_gpe_vfe_state(VADriverContextP ctx,
74                    struct i965_gpe_context *gpe_context,
75                    struct intel_batchbuffer *batch)
76 {
77
78     BEGIN_BATCH(batch, 8);
79
80     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
81     OUT_BATCH(batch, 0);                                        /* Scratch Space Base Pointer and Space */
82     OUT_BATCH(batch,
83               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
84               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
85               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
86     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
87     OUT_BATCH(batch,
88               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
89               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
90     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
91     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
92     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
93     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
94         
95     ADVANCE_BATCH(batch);
96
97 }
98
99 static void
100 gen6_gpe_curbe_load(VADriverContextP ctx,
101                     struct i965_gpe_context *gpe_context,
102                     struct intel_batchbuffer *batch)
103 {
104     BEGIN_BATCH(batch, 4);
105
106     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
107     OUT_BATCH(batch, 0);
108     OUT_BATCH(batch, gpe_context->curbe.length);
109     OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
110
111     ADVANCE_BATCH(batch);
112 }
113
114 static void
115 gen6_gpe_idrt(VADriverContextP ctx,
116               struct i965_gpe_context *gpe_context,
117               struct intel_batchbuffer *batch)
118 {
119     BEGIN_BATCH(batch, 4);
120
121     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
122     OUT_BATCH(batch, 0);
123     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
124     OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
125
126     ADVANCE_BATCH(batch);
127 }
128
129 void
130 i965_gpe_load_kernels(VADriverContextP ctx,
131                       struct i965_gpe_context *gpe_context,
132                       struct i965_kernel *kernel_list,
133                       unsigned int num_kernels)
134 {
135     struct i965_driver_data *i965 = i965_driver_data(ctx);
136     int i;
137
138     assert(num_kernels <= MAX_GPE_KERNELS);
139     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
140     gpe_context->num_kernels = num_kernels;
141
142     for (i = 0; i < num_kernels; i++) {
143         struct i965_kernel *kernel = &gpe_context->kernels[i];
144
145         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
146                                   kernel->name, 
147                                   kernel->size,
148                                   0x1000);
149         assert(kernel->bo);
150         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
151     }
152 }
153
154 void
155 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
156 {
157     int i;
158
159     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
160     gpe_context->surface_state_binding_table.bo = NULL;
161
162     dri_bo_unreference(gpe_context->idrt.bo);
163     gpe_context->idrt.bo = NULL;
164
165     dri_bo_unreference(gpe_context->curbe.bo);
166     gpe_context->curbe.bo = NULL;
167
168     for (i = 0; i < gpe_context->num_kernels; i++) {
169         struct i965_kernel *kernel = &gpe_context->kernels[i];
170
171         dri_bo_unreference(kernel->bo);
172         kernel->bo = NULL;
173     }
174 }
175
176 void
177 i965_gpe_context_init(VADriverContextP ctx,
178                       struct i965_gpe_context *gpe_context)
179 {
180     struct i965_driver_data *i965 = i965_driver_data(ctx);
181     dri_bo *bo;
182
183     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
184     bo = dri_bo_alloc(i965->intel.bufmgr,
185                       "surface state & binding table",
186                       gpe_context->surface_state_binding_table.length,
187                       4096);
188     assert(bo);
189     gpe_context->surface_state_binding_table.bo = bo;
190
191     dri_bo_unreference(gpe_context->idrt.bo);
192     bo = dri_bo_alloc(i965->intel.bufmgr,
193                       "interface descriptor table",
194                       gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
195                       4096);
196     assert(bo);
197     gpe_context->idrt.bo = bo;
198
199     dri_bo_unreference(gpe_context->curbe.bo);
200     bo = dri_bo_alloc(i965->intel.bufmgr,
201                       "curbe buffer",
202                       gpe_context->curbe.length,
203                       4096);
204     assert(bo);
205     gpe_context->curbe.bo = bo;
206 }
207
208 void
209 gen6_gpe_pipeline_setup(VADriverContextP ctx,
210                         struct i965_gpe_context *gpe_context,
211                         struct intel_batchbuffer *batch)
212 {
213     intel_batchbuffer_emit_mi_flush(batch);
214
215     i965_gpe_select(ctx, gpe_context, batch);
216     gen6_gpe_state_base_address(ctx, gpe_context, batch);
217     gen6_gpe_vfe_state(ctx, gpe_context, batch);
218     gen6_gpe_curbe_load(ctx, gpe_context, batch);
219     gen6_gpe_idrt(ctx, gpe_context, batch);
220 }
221
222 static void
223 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
224 {
225     switch (tiling) {
226     case I915_TILING_NONE:
227         ss->ss3.tiled_surface = 0;
228         ss->ss3.tile_walk = 0;
229         break;
230     case I915_TILING_X:
231         ss->ss3.tiled_surface = 1;
232         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
233         break;
234     case I915_TILING_Y:
235         ss->ss3.tiled_surface = 1;
236         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
237         break;
238     }
239 }
240
241 static void
242 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
243 {
244     switch (tiling) {
245     case I915_TILING_NONE:
246         ss->ss2.tiled_surface = 0;
247         ss->ss2.tile_walk = 0;
248         break;
249     case I915_TILING_X:
250         ss->ss2.tiled_surface = 1;
251         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
252         break;
253     case I915_TILING_Y:
254         ss->ss2.tiled_surface = 1;
255         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
256         break;
257     }
258 }
259
260 static void
261 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
262 {
263     switch (tiling) {
264     case I915_TILING_NONE:
265         ss->ss0.tiled_surface = 0;
266         ss->ss0.tile_walk = 0;
267         break;
268     case I915_TILING_X:
269         ss->ss0.tiled_surface = 1;
270         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
271         break;
272     case I915_TILING_Y:
273         ss->ss0.tiled_surface = 1;
274         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
275         break;
276     }
277 }
278
279 static void
280 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
281 {
282     switch (tiling) {
283     case I915_TILING_NONE:
284         ss->ss2.tiled_surface = 0;
285         ss->ss2.tile_walk = 0;
286         break;
287     case I915_TILING_X:
288         ss->ss2.tiled_surface = 1;
289         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
290         break;
291     case I915_TILING_Y:
292         ss->ss2.tiled_surface = 1;
293         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
294         break;
295     }
296 }
297
298 static void
299 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
300 {
301     switch (tiling) {
302     case I915_TILING_NONE:
303         ss->ss0.tiled_surface = 0;
304         ss->ss0.tile_walk = 0;
305         break;
306     case I915_TILING_X:
307         ss->ss0.tiled_surface = 1;
308         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
309         break;
310     case I915_TILING_Y:
311         ss->ss0.tiled_surface = 1;
312         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
313         break;
314     }
315 }
316
317 static void
318 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
319 {
320     switch (tiling) {
321     case I915_TILING_NONE:
322         ss->ss2.tiled_surface = 0;
323         ss->ss2.tile_walk = 0;
324         break;
325     case I915_TILING_X:
326         ss->ss2.tiled_surface = 1;
327         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
328         break;
329     case I915_TILING_Y:
330         ss->ss2.tiled_surface = 1;
331         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
332         break;
333     }
334 }
335
336 static void
337 i965_gpe_set_surface2_state(VADriverContextP ctx,
338                             struct object_surface *obj_surface,
339                             struct i965_surface_state2 *ss)
340 {
341     int w, h, w_pitch;
342     unsigned int tiling, swizzle;
343
344     assert(obj_surface->bo);
345     assert(obj_surface->fourcc == VA_FOURCC_NV12);
346
347     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
348     w = obj_surface->orig_width;
349     h = obj_surface->orig_height;
350     w_pitch = obj_surface->width;
351
352     memset(ss, 0, sizeof(*ss));
353     /* ss0 */
354     ss->ss0.surface_base_address = obj_surface->bo->offset;
355     /* ss1 */
356     ss->ss1.cbcr_pixel_offset_v_direction = 2;
357     ss->ss1.width = w - 1;
358     ss->ss1.height = h - 1;
359     /* ss2 */
360     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
361     ss->ss2.interleave_chroma = 1;
362     ss->ss2.pitch = w_pitch - 1;
363     ss->ss2.half_pitch_for_chroma = 0;
364     i965_gpe_set_surface2_tiling(ss, tiling);
365     /* ss3: UV offset for interleave mode */
366     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
367     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
368 }
369
370 void
371 i965_gpe_surface2_setup(VADriverContextP ctx,
372                         struct i965_gpe_context *gpe_context,
373                         struct object_surface *obj_surface,
374                         unsigned long binding_table_offset,
375                         unsigned long surface_state_offset)
376 {
377     struct i965_surface_state2 *ss;
378     dri_bo *bo;
379
380     bo = gpe_context->surface_state_binding_table.bo;
381     dri_bo_map(bo, 1);
382     assert(bo->virtual);
383
384     ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
385     i965_gpe_set_surface2_state(ctx, obj_surface, ss);
386     dri_bo_emit_reloc(bo,
387                       I915_GEM_DOMAIN_RENDER, 0,
388                       0,
389                       surface_state_offset + offsetof(struct i965_surface_state2, ss0),
390                       obj_surface->bo);
391
392     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
393     dri_bo_unmap(bo);
394 }
395
396 static void
397 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
398                                     struct object_surface *obj_surface,
399                                     struct i965_surface_state *ss)
400 {
401     int w, h, w_pitch;
402     unsigned int tiling, swizzle;
403
404     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
405     w = obj_surface->orig_width;
406     h = obj_surface->orig_height;
407     w_pitch = obj_surface->width;
408
409     memset(ss, 0, sizeof(*ss));
410     /* ss0 */
411     ss->ss0.surface_type = I965_SURFACE_2D;
412     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
413     /* ss1 */
414     ss->ss1.base_addr = obj_surface->bo->offset;
415     /* ss2 */
416     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
417     ss->ss2.height = h - 1;
418     /* ss3 */
419     ss->ss3.pitch = w_pitch - 1;
420     i965_gpe_set_surface_tiling(ss, tiling);
421 }
422
423 void
424 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
425                                 struct i965_gpe_context *gpe_context,
426                                 struct object_surface *obj_surface,
427                                 unsigned long binding_table_offset,
428                                 unsigned long surface_state_offset,
429                                 int write_enabled)
430 {
431     struct i965_surface_state *ss;
432     dri_bo *bo;
433
434     bo = gpe_context->surface_state_binding_table.bo;
435     dri_bo_map(bo, True);
436     assert(bo->virtual);
437
438     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
439     i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
440     dri_bo_emit_reloc(bo,
441                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
442                       0,
443                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
444                       obj_surface->bo);
445
446     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
447     dri_bo_unmap(bo);
448 }
449
450 static void
451 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
452                                   struct i965_buffer_surface *buffer_surface,
453                                   struct i965_surface_state *ss)
454 {
455     int num_entries;
456
457     assert(buffer_surface->bo);
458     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
459
460     memset(ss, 0, sizeof(*ss));
461     /* ss0 */
462     ss->ss0.render_cache_read_mode = 1;
463     ss->ss0.surface_type = I965_SURFACE_BUFFER;
464     /* ss1 */
465     ss->ss1.base_addr = buffer_surface->bo->offset;
466     /* ss2 */
467     ss->ss2.width = ((num_entries - 1) & 0x7f);
468     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
469     /* ss3 */
470     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
471     ss->ss3.pitch = buffer_surface->pitch - 1;
472 }
473
474 void
475 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
476                              struct i965_gpe_context *gpe_context,
477                              struct i965_buffer_surface *buffer_surface,
478                              unsigned long binding_table_offset,
479                              unsigned long surface_state_offset)
480 {
481     struct i965_surface_state *ss;
482     dri_bo *bo;
483
484     bo = gpe_context->surface_state_binding_table.bo;
485     dri_bo_map(bo, 1);
486     assert(bo->virtual);
487
488     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
489     i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
490     dri_bo_emit_reloc(bo,
491                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
492                       0,
493                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
494                       buffer_surface->bo);
495
496     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
497     dri_bo_unmap(bo);
498 }
499
500 static void
501 gen7_gpe_set_surface2_state(VADriverContextP ctx,
502                             struct object_surface *obj_surface,
503                             struct gen7_surface_state2 *ss)
504 {
505     int w, h, w_pitch;
506     unsigned int tiling, swizzle;
507
508     assert(obj_surface->bo);
509     assert(obj_surface->fourcc == VA_FOURCC_NV12);
510
511     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
512     w = obj_surface->orig_width;
513     h = obj_surface->orig_height;
514     w_pitch = obj_surface->width;
515
516     memset(ss, 0, sizeof(*ss));
517     /* ss0 */
518     ss->ss0.surface_base_address = obj_surface->bo->offset;
519     /* ss1 */
520     ss->ss1.cbcr_pixel_offset_v_direction = 2;
521     ss->ss1.width = w - 1;
522     ss->ss1.height = h - 1;
523     /* ss2 */
524     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
525     ss->ss2.interleave_chroma = 1;
526     ss->ss2.pitch = w_pitch - 1;
527     ss->ss2.half_pitch_for_chroma = 0;
528     gen7_gpe_set_surface2_tiling(ss, tiling);
529     /* ss3: UV offset for interleave mode */
530     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
531     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
532 }
533
534 void
535 gen7_gpe_surface2_setup(VADriverContextP ctx,
536                         struct i965_gpe_context *gpe_context,
537                         struct object_surface *obj_surface,
538                         unsigned long binding_table_offset,
539                         unsigned long surface_state_offset)
540 {
541     struct gen7_surface_state2 *ss;
542     dri_bo *bo;
543
544     bo = gpe_context->surface_state_binding_table.bo;
545     dri_bo_map(bo, 1);
546     assert(bo->virtual);
547
548     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
549     gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
550     dri_bo_emit_reloc(bo,
551                       I915_GEM_DOMAIN_RENDER, 0,
552                       0,
553                       surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
554                       obj_surface->bo);
555
556     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
557     dri_bo_unmap(bo);
558 }
559
560 static void
561 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
562                                     struct object_surface *obj_surface,
563                                     struct gen7_surface_state *ss)
564 {
565     int w, h, w_pitch;
566     unsigned int tiling, swizzle;
567
568     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
569     w = obj_surface->orig_width;
570     h = obj_surface->orig_height;
571     w_pitch = obj_surface->width;
572
573     memset(ss, 0, sizeof(*ss));
574     /* ss0 */
575     ss->ss0.surface_type = I965_SURFACE_2D;
576     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
577     /* ss1 */
578     ss->ss1.base_addr = obj_surface->bo->offset;
579     /* ss2 */
580     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
581     ss->ss2.height = h - 1;
582     /* ss3 */
583     ss->ss3.pitch = w_pitch - 1;
584     gen7_gpe_set_surface_tiling(ss, tiling);
585 }
586
587 static void
588 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
589                                     struct object_surface *obj_surface,
590                                     struct gen7_surface_state *ss)
591 {
592     int w, w_pitch;
593     unsigned int tiling, swizzle;
594     int cbcr_offset;
595
596     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
597     w = obj_surface->orig_width;
598     w_pitch = obj_surface->width;
599
600     cbcr_offset = obj_surface->height * obj_surface->width;
601     memset(ss, 0, sizeof(*ss));
602     /* ss0 */
603     ss->ss0.surface_type = I965_SURFACE_2D;
604     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
605     /* ss1 */
606     ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
607     /* ss2 */
608     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
609     ss->ss2.height = (obj_surface->height / 2) -1;
610     /* ss3 */
611     ss->ss3.pitch = w_pitch - 1;
612     gen7_gpe_set_surface_tiling(ss, tiling);
613 }
614
615 void
616 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
617                                 struct i965_gpe_context *gpe_context,
618                                 struct object_surface *obj_surface,
619                                 unsigned long binding_table_offset,
620                                 unsigned long surface_state_offset,
621                                 int write_enabled)
622 {
623     struct gen7_surface_state *ss;
624     dri_bo *bo;
625
626     bo = gpe_context->surface_state_binding_table.bo;
627     dri_bo_map(bo, True);
628     assert(bo->virtual);
629
630     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
631     gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
632     dri_bo_emit_reloc(bo,
633                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
634                       0,
635                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
636                       obj_surface->bo);
637
638     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
639     dri_bo_unmap(bo);
640 }
641
642 void
643 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
644                                 struct i965_gpe_context *gpe_context,
645                                 struct object_surface *obj_surface,
646                                 unsigned long binding_table_offset,
647                                 unsigned long surface_state_offset,
648                                 int write_enabled)
649 {
650     struct gen7_surface_state *ss;
651     dri_bo *bo;
652     int cbcr_offset;
653
654         assert(obj_surface->fourcc == VA_FOURCC_NV12);
655     bo = gpe_context->surface_state_binding_table.bo;
656     dri_bo_map(bo, True);
657     assert(bo->virtual);
658
659     cbcr_offset = obj_surface->height * obj_surface->width;
660     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
661     gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
662     dri_bo_emit_reloc(bo,
663                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
664                       cbcr_offset,
665                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
666                       obj_surface->bo);
667
668     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
669     dri_bo_unmap(bo);
670 }
671
672
673 static void
674 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
675                                   struct i965_buffer_surface *buffer_surface,
676                                   struct gen7_surface_state *ss)
677 {
678     int num_entries;
679
680     assert(buffer_surface->bo);
681     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
682
683     memset(ss, 0, sizeof(*ss));
684     /* ss0 */
685     ss->ss0.surface_type = I965_SURFACE_BUFFER;
686     /* ss1 */
687     ss->ss1.base_addr = buffer_surface->bo->offset;
688     /* ss2 */
689     ss->ss2.width = ((num_entries - 1) & 0x7f);
690     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
691     /* ss3 */
692     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
693     ss->ss3.pitch = buffer_surface->pitch - 1;
694 }
695
696 void
697 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
698                              struct i965_gpe_context *gpe_context,
699                              struct i965_buffer_surface *buffer_surface,
700                              unsigned long binding_table_offset,
701                              unsigned long surface_state_offset)
702 {
703     struct gen7_surface_state *ss;
704     dri_bo *bo;
705
706     bo = gpe_context->surface_state_binding_table.bo;
707     dri_bo_map(bo, 1);
708     assert(bo->virtual);
709
710     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
711     gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
712     dri_bo_emit_reloc(bo,
713                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
714                       0,
715                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
716                       buffer_surface->bo);
717
718     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
719     dri_bo_unmap(bo);
720 }
721
722 static void
723 gen8_gpe_set_surface2_state(VADriverContextP ctx,
724                             struct object_surface *obj_surface,
725                             struct gen8_surface_state2 *ss)
726 {
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     int w, h, w_pitch;
729     unsigned int tiling, swizzle;
730
731     assert(obj_surface->bo);
732     assert(obj_surface->fourcc == VA_FOURCC_NV12);
733
734     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
735     w = obj_surface->orig_width;
736     h = obj_surface->orig_height;
737     w_pitch = obj_surface->width;
738
739     memset(ss, 0, sizeof(*ss));
740     /* ss0 */
741     if (IS_GEN9(i965->intel.device_info))
742         ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
743
744     ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
745     ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
746     /* ss1 */
747     ss->ss1.cbcr_pixel_offset_v_direction = 2;
748     ss->ss1.width = w - 1;
749     ss->ss1.height = h - 1;
750     /* ss2 */
751     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
752     ss->ss2.interleave_chroma = 1;
753     ss->ss2.pitch = w_pitch - 1;
754     ss->ss2.half_pitch_for_chroma = 0;
755     gen8_gpe_set_surface2_tiling(ss, tiling);
756     /* ss3: UV offset for interleave mode */
757     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
758     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
759 }
760
761 void
762 gen8_gpe_surface2_setup(VADriverContextP ctx,
763                         struct i965_gpe_context *gpe_context,
764                         struct object_surface *obj_surface,
765                         unsigned long binding_table_offset,
766                         unsigned long surface_state_offset)
767 {
768     struct gen8_surface_state2 *ss;
769     dri_bo *bo;
770
771     bo = gpe_context->surface_state_binding_table.bo;
772     dri_bo_map(bo, 1);
773     assert(bo->virtual);
774
775     ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
776     gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
777     dri_bo_emit_reloc(bo,
778                       I915_GEM_DOMAIN_RENDER, 0,
779                       0,
780                       surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
781                       obj_surface->bo);
782
783     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
784     dri_bo_unmap(bo);
785 }
786
787 static void
788 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
789                                     struct object_surface *obj_surface,
790                                     struct gen8_surface_state *ss)
791 {
792     struct i965_driver_data *i965 = i965_driver_data(ctx);
793     int w, h, w_pitch;
794     unsigned int tiling, swizzle;
795
796     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
797     w = obj_surface->orig_width;
798     h = obj_surface->orig_height;
799     w_pitch = obj_surface->width;
800
801     memset(ss, 0, sizeof(*ss));
802     /* ss0 */
803     if (IS_GEN9(i965->intel.device_info))
804         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
805
806     ss->ss0.surface_type = I965_SURFACE_2D;
807     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
808     /* ss1 */
809     ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
810     ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
811     /* ss2 */
812     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
813     ss->ss2.height = h - 1;
814     /* ss3 */
815     ss->ss3.pitch = w_pitch - 1;
816     gen8_gpe_set_surface_tiling(ss, tiling);
817 }
818
819 static void
820 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
821                                     struct object_surface *obj_surface,
822                                     struct gen8_surface_state *ss)
823 {
824     struct i965_driver_data *i965 = i965_driver_data(ctx);
825     int w, w_pitch;
826     unsigned int tiling, swizzle;
827     int cbcr_offset;
828     uint64_t base_offset;
829
830     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
831     w = obj_surface->orig_width;
832     w_pitch = obj_surface->width;
833
834     cbcr_offset = obj_surface->height * obj_surface->width;
835     memset(ss, 0, sizeof(*ss));
836     /* ss0 */
837     if (IS_GEN9(i965->intel.device_info))
838         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
839
840     ss->ss0.surface_type = I965_SURFACE_2D;
841     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
842     /* ss1 */
843     base_offset = obj_surface->bo->offset64 + cbcr_offset;
844     ss->ss8.base_addr = (uint32_t) base_offset;
845     ss->ss9.base_addr_high = (uint32_t) (base_offset >> 32);
846     /* ss2 */
847     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
848     ss->ss2.height = (obj_surface->height / 2) -1;
849     /* ss3 */
850     ss->ss3.pitch = w_pitch - 1;
851     gen8_gpe_set_surface_tiling(ss, tiling);
852 }
853
854 void
855 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
856                                 struct i965_gpe_context *gpe_context,
857                                 struct object_surface *obj_surface,
858                                 unsigned long binding_table_offset,
859                                 unsigned long surface_state_offset,
860                                 int write_enabled)
861 {
862     struct gen8_surface_state *ss;
863     dri_bo *bo;
864
865     bo = gpe_context->surface_state_binding_table.bo;
866     dri_bo_map(bo, True);
867     assert(bo->virtual);
868
869     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
870     gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
871     dri_bo_emit_reloc(bo,
872                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
873                       0,
874                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
875                       obj_surface->bo);
876
877     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
878     dri_bo_unmap(bo);
879 }
880
881 void
882 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
883                                 struct i965_gpe_context *gpe_context,
884                                 struct object_surface *obj_surface,
885                                 unsigned long binding_table_offset,
886                                 unsigned long surface_state_offset,
887                                 int write_enabled)
888 {
889     struct gen8_surface_state *ss;
890     dri_bo *bo;
891     int cbcr_offset;
892
893         assert(obj_surface->fourcc == VA_FOURCC_NV12);
894     bo = gpe_context->surface_state_binding_table.bo;
895     dri_bo_map(bo, True);
896     assert(bo->virtual);
897
898     cbcr_offset = obj_surface->height * obj_surface->width;
899     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
900     gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
901     dri_bo_emit_reloc(bo,
902                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
903                       cbcr_offset,
904                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
905                       obj_surface->bo);
906
907     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
908     dri_bo_unmap(bo);
909 }
910
911
912 static void
913 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
914                                   struct i965_buffer_surface *buffer_surface,
915                                   struct gen8_surface_state *ss)
916 {
917     struct i965_driver_data *i965 = i965_driver_data(ctx);
918     int num_entries;
919
920     assert(buffer_surface->bo);
921     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
922
923     memset(ss, 0, sizeof(*ss));
924     /* ss0 */
925     ss->ss0.surface_type = I965_SURFACE_BUFFER;
926     if (IS_GEN9(i965->intel.device_info))
927         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
928
929     /* ss1 */
930     ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
931     ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
932     /* ss2 */
933     ss->ss2.width = ((num_entries - 1) & 0x7f);
934     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
935     /* ss3 */
936     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
937     ss->ss3.pitch = buffer_surface->pitch - 1;
938 }
939
940 void
941 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
942                              struct i965_gpe_context *gpe_context,
943                              struct i965_buffer_surface *buffer_surface,
944                              unsigned long binding_table_offset,
945                              unsigned long surface_state_offset)
946 {
947     struct gen8_surface_state *ss;
948     dri_bo *bo;
949
950     bo = gpe_context->surface_state_binding_table.bo;
951     dri_bo_map(bo, 1);
952     assert(bo->virtual);
953
954     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
955     gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
956     dri_bo_emit_reloc(bo,
957                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
958                       0,
959                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
960                       buffer_surface->bo);
961
962     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
963     dri_bo_unmap(bo);
964 }
965
966 static void
967 gen8_gpe_state_base_address(VADriverContextP ctx,
968                             struct i965_gpe_context *gpe_context,
969                             struct intel_batchbuffer *batch)
970 {
971     BEGIN_BATCH(batch, 16);
972
973     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
974
975     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
976     OUT_BATCH(batch, 0);
977     OUT_BATCH(batch, 0);
978
979         /*DW4 Surface state base address */
980     OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
981
982         /*DW6. Dynamic state base address */
983     if (gpe_context->dynamic_state.bo)
984         OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
985                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
986                   0, BASE_ADDRESS_MODIFY);
987     else {
988         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
989         OUT_BATCH(batch, 0);
990     }
991
992
993         /*DW8. Indirect Object base address */
994     if (gpe_context->indirect_state.bo)
995         OUT_RELOC64(batch, gpe_context->indirect_state.bo,
996                   I915_GEM_DOMAIN_SAMPLER,
997                   0, BASE_ADDRESS_MODIFY);
998     else {
999         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(batch, 0);
1001     }
1002
1003
1004         /*DW10. Instruct base address */
1005     if (gpe_context->instruction_state.bo)
1006         OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1007                   I915_GEM_DOMAIN_INSTRUCTION,
1008                   0, BASE_ADDRESS_MODIFY);
1009     else {
1010         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1011         OUT_BATCH(batch, 0);
1012     }
1013
1014         /* DW12. Size limitation */
1015     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound      
1016     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
1017     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
1018     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
1019
1020     /*
1021       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
1022       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
1023     */
1024
1025     ADVANCE_BATCH(batch);
1026 }
1027
1028 static void
1029 gen8_gpe_vfe_state(VADriverContextP ctx,
1030                    struct i965_gpe_context *gpe_context,
1031                    struct intel_batchbuffer *batch)
1032 {
1033
1034     BEGIN_BATCH(batch, 9);
1035
1036     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1037     /* Scratch Space Base Pointer and Space */
1038     OUT_BATCH(batch, 0);    
1039     OUT_BATCH(batch, 0);
1040
1041     OUT_BATCH(batch,
1042               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
1043               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
1044               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
1045     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
1046     OUT_BATCH(batch,
1047               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
1048               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
1049
1050     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1051     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
1052     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
1053     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
1054         
1055     ADVANCE_BATCH(batch);
1056
1057 }
1058
1059
1060 static void
1061 gen8_gpe_curbe_load(VADriverContextP ctx,
1062                     struct i965_gpe_context *gpe_context,
1063                     struct intel_batchbuffer *batch)
1064 {
1065     BEGIN_BATCH(batch, 4);
1066
1067     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1068     OUT_BATCH(batch, 0);
1069     OUT_BATCH(batch, gpe_context->curbe.length);
1070     OUT_BATCH(batch, gpe_context->curbe.offset);
1071
1072     ADVANCE_BATCH(batch);
1073 }
1074
1075 static void
1076 gen8_gpe_idrt(VADriverContextP ctx,
1077               struct i965_gpe_context *gpe_context,
1078               struct intel_batchbuffer *batch)
1079 {
1080     BEGIN_BATCH(batch, 6);
1081
1082     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1083     OUT_BATCH(batch, 0);
1084
1085     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1086     OUT_BATCH(batch, 0);
1087     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
1088     OUT_BATCH(batch, gpe_context->idrt.offset);
1089
1090     ADVANCE_BATCH(batch);
1091 }
1092
1093
1094 void
1095 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1096                         struct i965_gpe_context *gpe_context,
1097                         struct intel_batchbuffer *batch)
1098 {
1099     intel_batchbuffer_emit_mi_flush(batch);
1100
1101     i965_gpe_select(ctx, gpe_context, batch);
1102     gen8_gpe_state_base_address(ctx, gpe_context, batch);
1103     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1104     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1105     gen8_gpe_idrt(ctx, gpe_context, batch);
1106 }
1107
1108 void
1109 gen8_gpe_context_init(VADriverContextP ctx,
1110                       struct i965_gpe_context *gpe_context)
1111 {
1112     struct i965_driver_data *i965 = i965_driver_data(ctx);
1113     dri_bo *bo;
1114     int bo_size;
1115     unsigned int start_offset, end_offset;
1116
1117     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1118     bo = dri_bo_alloc(i965->intel.bufmgr,
1119                       "surface state & binding table",
1120                       gpe_context->surface_state_binding_table.length,
1121                       4096);
1122     assert(bo);
1123     gpe_context->surface_state_binding_table.bo = bo;
1124
1125     bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
1126     dri_bo_unreference(gpe_context->dynamic_state.bo);
1127     bo = dri_bo_alloc(i965->intel.bufmgr,
1128                       "surface state & binding table",
1129                       bo_size,
1130                       4096);
1131     assert(bo);
1132     gpe_context->dynamic_state.bo = bo;
1133     gpe_context->dynamic_state.bo_size = bo_size;
1134
1135     end_offset = 0;
1136     gpe_context->dynamic_state.end_offset = 0;
1137
1138     /* Constant buffer offset */
1139     start_offset = ALIGN(end_offset, 64);
1140     dri_bo_unreference(gpe_context->curbe.bo);
1141     gpe_context->curbe.bo = bo;
1142     dri_bo_reference(gpe_context->curbe.bo);
1143     gpe_context->curbe.offset = start_offset;
1144     end_offset = start_offset + gpe_context->curbe.length;
1145
1146     /* Interface descriptor offset */
1147     start_offset = ALIGN(end_offset, 64);
1148     dri_bo_unreference(gpe_context->idrt.bo);
1149     gpe_context->idrt.bo = bo;
1150     dri_bo_reference(gpe_context->idrt.bo);
1151     gpe_context->idrt.offset = start_offset;
1152     end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;
1153
1154     /* Sampler state offset */
1155     start_offset = ALIGN(end_offset, 64);
1156     gpe_context->sampler_offset = start_offset;
1157     end_offset = start_offset + gpe_context->sampler_size;
1158
1159     /* update the end offset of dynamic_state */
1160     gpe_context->dynamic_state.end_offset = end_offset;
1161 }
1162
1163
1164 void
1165 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1166 {
1167     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1168     gpe_context->surface_state_binding_table.bo = NULL;
1169
1170     dri_bo_unreference(gpe_context->instruction_state.bo);
1171     gpe_context->instruction_state.bo = NULL;
1172
1173     dri_bo_unreference(gpe_context->dynamic_state.bo);
1174     gpe_context->dynamic_state.bo = NULL;
1175
1176     dri_bo_unreference(gpe_context->indirect_state.bo);
1177     gpe_context->indirect_state.bo = NULL;
1178
1179     dri_bo_unreference(gpe_context->curbe.bo);
1180     gpe_context->curbe.bo = NULL;
1181
1182     dri_bo_unreference(gpe_context->idrt.bo);
1183     gpe_context->idrt.bo = NULL;
1184 }
1185
1186
1187 void
1188 gen8_gpe_load_kernels(VADriverContextP ctx,
1189                       struct i965_gpe_context *gpe_context,
1190                       struct i965_kernel *kernel_list,
1191                       unsigned int num_kernels)
1192 {
1193     struct i965_driver_data *i965 = i965_driver_data(ctx);
1194     int i, kernel_size = 0;
1195     unsigned int kernel_offset, end_offset;
1196     unsigned char *kernel_ptr;
1197     struct i965_kernel *kernel;
1198
1199     assert(num_kernels <= MAX_GPE_KERNELS);
1200     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1201     gpe_context->num_kernels = num_kernels;
1202
1203     for (i = 0; i < num_kernels; i++) {
1204         kernel = &gpe_context->kernels[i];
1205
1206         kernel_size += ALIGN(kernel->size, 64);
1207     }
1208
1209     gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1210                                   "kernel shader",
1211                                   kernel_size,
1212                                   0x1000);
1213     if (gpe_context->instruction_state.bo == NULL) {
1214         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1215         return;
1216     }
1217
1218     assert(gpe_context->instruction_state.bo);
1219
1220     gpe_context->instruction_state.bo_size = kernel_size;
1221     gpe_context->instruction_state.end_offset = 0;
1222     end_offset = 0;
1223
1224     dri_bo_map(gpe_context->instruction_state.bo, 1);
1225     kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1226     for (i = 0; i < num_kernels; i++) {
1227         kernel_offset = ALIGN(end_offset, 64);
1228         kernel = &gpe_context->kernels[i];
1229         kernel->kernel_offset = kernel_offset;
1230
1231         if (kernel->size) {
1232             memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1233
1234             end_offset = kernel_offset + kernel->size;
1235         }
1236     }
1237
1238     gpe_context->instruction_state.end_offset = end_offset;
1239
1240     dri_bo_unmap(gpe_context->instruction_state.bo);
1241
1242     return;
1243 }
1244
1245 static void
1246 gen9_gpe_state_base_address(VADriverContextP ctx,
1247                             struct i965_gpe_context *gpe_context,
1248                             struct intel_batchbuffer *batch)
1249 {
1250     BEGIN_BATCH(batch, 19);
1251
1252     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
1253
1254     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
1255     OUT_BATCH(batch, 0);
1256     OUT_BATCH(batch, 0);
1257
1258         /*DW4 Surface state base address */
1259     OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1260
1261         /*DW6. Dynamic state base address */
1262     if (gpe_context->dynamic_state.bo)
1263         OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1264                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1265                   I915_GEM_DOMAIN_RENDER, BASE_ADDRESS_MODIFY);
1266     else {
1267         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1268         OUT_BATCH(batch, 0);
1269     }
1270
1271
1272         /*DW8. Indirect Object base address */
1273     if (gpe_context->indirect_state.bo)
1274         OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1275                   I915_GEM_DOMAIN_SAMPLER,
1276                   0, BASE_ADDRESS_MODIFY);
1277     else {
1278         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1279         OUT_BATCH(batch, 0);
1280     }
1281
1282
1283         /*DW10. Instruct base address */
1284     if (gpe_context->instruction_state.bo)
1285         OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1286                   I915_GEM_DOMAIN_INSTRUCTION,
1287                   0, BASE_ADDRESS_MODIFY);
1288     else {
1289         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1290         OUT_BATCH(batch, 0);
1291     }
1292
1293
1294         /* DW12. Size limitation */
1295     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound
1296     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
1297     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
1298     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
1299
1300     /* the bindless surface state address */
1301     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1302     OUT_BATCH(batch, 0);
1303     OUT_BATCH(batch, 0xFFFFF000);
1304
1305     ADVANCE_BATCH(batch);
1306 }
1307
1308 static void
1309 gen9_gpe_select(VADriverContextP ctx,
1310                 struct i965_gpe_context *gpe_context,
1311                 struct intel_batchbuffer *batch)
1312 {
1313     BEGIN_BATCH(batch, 1);
1314     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1315                      GEN9_PIPELINE_SELECTION_MASK |
1316                      GEN9_MEDIA_DOP_GATE_OFF |
1317                      GEN9_MEDIA_DOP_GATE_MASK |
1318                      GEN9_FORCE_MEDIA_AWAKE_ON |
1319                      GEN9_FORCE_MEDIA_AWAKE_MASK);
1320     ADVANCE_BATCH(batch);
1321 }
1322
1323 void
1324 gen9_gpe_pipeline_setup(VADriverContextP ctx,
1325                         struct i965_gpe_context *gpe_context,
1326                         struct intel_batchbuffer *batch)
1327 {
1328     intel_batchbuffer_emit_mi_flush(batch);
1329
1330     gen9_gpe_select(ctx, gpe_context, batch);
1331     gen9_gpe_state_base_address(ctx, gpe_context, batch);
1332     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1333     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1334     gen8_gpe_idrt(ctx, gpe_context, batch);
1335 }
1336
1337 void
1338 gen9_gpe_pipeline_end(VADriverContextP ctx,
1339                       struct i965_gpe_context *gpe_context,
1340                       struct intel_batchbuffer *batch)
1341 {
1342     BEGIN_BATCH(batch, 1);
1343     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1344               GEN9_PIPELINE_SELECTION_MASK |
1345               GEN9_MEDIA_DOP_GATE_ON |
1346               GEN9_MEDIA_DOP_GATE_MASK |
1347               GEN9_FORCE_MEDIA_AWAKE_OFF |
1348               GEN9_FORCE_MEDIA_AWAKE_MASK);
1349     ADVANCE_BATCH(batch);
1350 }
1351
1352 Bool
1353 i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
1354                            struct i965_gpe_resource *res,
1355                            int size,
1356                            const char *name)
1357 {
1358     if (!res || !size)
1359         return false;
1360
1361     res->size = size;
1362     res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1363     res->map = NULL;
1364
1365     return (res->bo != NULL);
1366 }
1367
1368 void
1369 i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
1370                                        struct object_surface *obj_surface)
1371 {
1372     unsigned int swizzle;
1373
1374     res->type = I965_GPE_RESOURCE_2D;
1375     res->width = obj_surface->orig_width;
1376     res->height = obj_surface->orig_height;
1377     res->pitch = obj_surface->width;
1378     res->size = obj_surface->size;
1379     res->cb_cr_pitch = obj_surface->cb_cr_pitch;
1380     res->x_cb_offset = obj_surface->x_cb_offset;
1381     res->y_cb_offset = obj_surface->y_cb_offset;
1382     res->bo = obj_surface->bo;
1383     res->map = NULL;
1384
1385     dri_bo_reference(res->bo);
1386     dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
1387 }
1388
1389 void
1390 i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
1391                                        dri_bo *bo)
1392 {
1393     unsigned int swizzle;
1394
1395     res->type = I965_GPE_RESOURCE_BUFFER;
1396     res->width = bo->size;
1397     res->height = 1;
1398     res->pitch = res->width;
1399     res->size = res->pitch * res->width;
1400     res->bo = bo;
1401     res->map = NULL;
1402
1403     dri_bo_reference(res->bo);
1404     dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1405 }
1406
1407 void
1408 i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
1409                                        dri_bo *bo,
1410                                        unsigned int width,
1411                                        unsigned int height,
1412                                        unsigned int pitch)
1413 {
1414     unsigned int swizzle;
1415
1416     res->type = I965_GPE_RESOURCE_2D;
1417     res->width = width;
1418     res->height = height;
1419     res->pitch = pitch;
1420     res->size = res->pitch * res->width;
1421     res->bo = bo;
1422     res->map = NULL;
1423
1424     dri_bo_reference(res->bo);
1425     dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1426 }
1427
1428 void
1429 i965_zero_gpe_resource(struct i965_gpe_resource *res)
1430 {
1431     if (res->bo) {
1432         dri_bo_map(res->bo, 1);
1433         memset(res->bo->virtual, 0, res->size);
1434         dri_bo_unmap(res->bo);
1435     }
1436 }
1437
1438 void
1439 i965_free_gpe_resource(struct i965_gpe_resource *res)
1440 {
1441     dri_bo_unreference(res->bo);
1442     res->bo = NULL;
1443     res->map = NULL;
1444 }
1445
1446 void *
1447 i965_map_gpe_resource(struct i965_gpe_resource *res)
1448 {
1449     int ret;
1450
1451     if (res->bo) {
1452         ret = dri_bo_map(res->bo, 1);
1453
1454         if (ret == 0)
1455             res->map = res->bo->virtual;
1456         else
1457             res->map = NULL;
1458     } else
1459         res->map = NULL;
1460
1461     return res->map;
1462 }
1463
1464 void
1465 i965_unmap_gpe_resource(struct i965_gpe_resource *res)
1466 {
1467     if (res->bo && res->map)
1468         dri_bo_unmap(res->bo);
1469
1470     res->map = NULL;
1471 }
1472
1473 void
1474 gen9_gpe_mi_flush_dw(VADriverContextP ctx,
1475                      struct intel_batchbuffer *batch,
1476                      struct gpe_mi_flush_dw_parameter *params)
1477 {
1478     int video_pipeline_cache_invalidate = 0;
1479     int post_sync_operation = MI_FLUSH_DW_NOWRITE;
1480
1481     if (params->video_pipeline_cache_invalidate)
1482         video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
1483
1484     if (params->bo)
1485         post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
1486
1487     __OUT_BATCH(batch, (MI_FLUSH_DW2 |
1488                         video_pipeline_cache_invalidate |
1489                         post_sync_operation |
1490                         (5 - 2))); /* Always use PPGTT */
1491
1492     if (params->bo) {
1493         __OUT_RELOC64(batch,
1494                       params->bo,
1495                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1496                       params->offset);
1497     } else {
1498         __OUT_BATCH(batch, 0);
1499         __OUT_BATCH(batch, 0);
1500     }
1501
1502     __OUT_BATCH(batch, params->dw0);
1503     __OUT_BATCH(batch, params->dw1);
1504 }
1505
1506 void
1507 gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
1508                            struct intel_batchbuffer *batch,
1509                            struct gpe_mi_store_data_imm_parameter *params)
1510 {
1511     if (params->is_qword) {
1512         __OUT_BATCH(batch, MI_STORE_DATA_IMM |
1513                     (1 << 21) |
1514                     (5 - 2)); /* Always use PPGTT */
1515     } else {
1516         __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
1517     }
1518
1519     __OUT_RELOC64(batch,
1520                   params->bo,
1521                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1522                   params->offset);
1523     __OUT_BATCH(batch, params->dw0);
1524
1525     if (params->is_qword)
1526         __OUT_BATCH(batch, params->dw1);
1527 }
1528
1529 void
1530 gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
1531                                struct intel_batchbuffer *batch,
1532                                struct gpe_mi_store_register_mem_parameter *params)
1533 {
1534     __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1535     __OUT_BATCH(batch, params->mmio_offset);
1536     __OUT_RELOC64(batch,
1537                   params->bo,
1538                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1539                   params->offset);
1540 }
1541
1542 void
1543 gen9_gpe_mi_load_register_mem(VADriverContextP ctx,
1544                               struct intel_batchbuffer *batch,
1545                               struct gpe_mi_load_register_mem_parameter *params)
1546 {
1547     __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1548     __OUT_BATCH(batch, params->mmio_offset);
1549     __OUT_RELOC64(batch,
1550                   params->bo,
1551                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1552                   params->offset);
1553 }
1554
1555 void
1556 gen9_gpe_mi_load_register_imm(VADriverContextP ctx,
1557                               struct intel_batchbuffer *batch,
1558                               struct gpe_mi_load_register_imm_parameter *params)
1559 {
1560     __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
1561     __OUT_BATCH(batch, params->mmio_offset);
1562     __OUT_BATCH(batch, params->data);
1563 }
1564
1565 void
1566 gen9_gpe_mi_load_register_reg(VADriverContextP ctx,
1567                               struct intel_batchbuffer *batch,
1568                               struct gpe_mi_load_register_reg_parameter *params)
1569 {
1570     __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
1571     __OUT_BATCH(batch, params->src_mmio_offset);
1572     __OUT_BATCH(batch, params->dst_mmio_offset);
1573 }
1574
1575 void
1576 gen9_gpe_mi_math(VADriverContextP ctx,
1577                  struct intel_batchbuffer *batch,
1578                  struct gpe_mi_math_parameter *params)
1579 {
1580     __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
1581     intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
1582 }
1583
1584 void
1585 gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
1586                                          struct intel_batchbuffer *batch,
1587                                          struct gpe_mi_conditional_batch_buffer_end_parameter *params)
1588 {
1589     int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
1590
1591     if (params->compare_mask_mode_disabled)
1592         compare_mask_mode_enabled = 0;
1593
1594     __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
1595                         (1 << 21) |
1596                         compare_mask_mode_enabled |
1597                         (4 - 2))); /* Always use PPGTT */
1598     __OUT_BATCH(batch, params->compare_data);
1599     __OUT_RELOC64(batch,
1600                   params->bo,
1601                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1602                   params->offset);
1603 }
1604
1605 void
1606 gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
1607                                struct intel_batchbuffer *batch,
1608                                struct gpe_mi_batch_buffer_start_parameter *params)
1609 {
1610     __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
1611                         (!!params->is_second_level << 22) |
1612                         (!params->use_global_gtt << 8) |
1613                         (1 << 0)));
1614     __OUT_RELOC64(batch,
1615                 params->bo,
1616                 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1617                 params->offset);
1618 }
1619
1620 void
1621 gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
1622                                     struct i965_gpe_context *gpe_context,
1623                                     struct gpe_dynamic_state_parameter *ds)
1624 {
1625     if (!ds->bo || !gpe_context)
1626         return;
1627
1628     dri_bo_unreference(gpe_context->dynamic_state.bo);
1629     gpe_context->dynamic_state.bo = ds->bo;
1630     dri_bo_reference(gpe_context->dynamic_state.bo);
1631     gpe_context->dynamic_state.bo_size = ds->bo_size;
1632
1633     /* curbe buffer is a part of the dynamic buffer */
1634     dri_bo_unreference(gpe_context->curbe.bo);
1635     gpe_context->curbe.bo = ds->bo;
1636     dri_bo_reference(gpe_context->curbe.bo);
1637     gpe_context->curbe.offset = ds->curbe_offset;
1638
1639     /* idrt buffer is a part of the dynamic buffer */
1640     dri_bo_unreference(gpe_context->idrt.bo);
1641     gpe_context->idrt.bo = ds->bo;
1642     dri_bo_reference(gpe_context->idrt.bo);
1643     gpe_context->idrt.offset = ds->idrt_offset;
1644
1645     gpe_context->sampler_offset = ds->sampler_offset;
1646
1647     return;
1648 }
1649
1650 void *
1651 gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
1652 {
1653     dri_bo_map(gpe_context->curbe.bo, 1);
1654
1655     return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
1656 }
1657
1658 void
1659 gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
1660 {
1661     dri_bo_unmap(gpe_context->curbe.bo);
1662 }
1663
1664 void
1665 gen9_gpe_reset_binding_table(VADriverContextP ctx,
1666                               struct i965_gpe_context *gpe_context)
1667 {
1668     unsigned int *binding_table;
1669     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
1670     int i;
1671
1672     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1673     binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
1674
1675     for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
1676         *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
1677     }
1678
1679     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1680 }
1681
1682 void
1683 gen8_gpe_setup_interface_data(VADriverContextP ctx,
1684                               struct i965_gpe_context *gpe_context)
1685 {
1686     struct gen8_interface_descriptor_data *desc;
1687     int i;
1688     dri_bo *bo;
1689     unsigned char *desc_ptr;
1690
1691     bo = gpe_context->idrt.bo;
1692     dri_bo_map(bo, 1);
1693     assert(bo->virtual);
1694     desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
1695     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1696
1697     for (i = 0; i < gpe_context->num_kernels; i++) {
1698         struct i965_kernel *kernel;
1699
1700         kernel = &gpe_context->kernels[i];
1701         assert(sizeof(*desc) == 32);
1702
1703         /*Setup the descritor table*/
1704         memset(desc, 0, sizeof(*desc));
1705         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1706         desc->desc3.sampler_count = 0;
1707         desc->desc3.sampler_state_pointer = (gpe_context->sampler_offset >> 5);
1708         desc->desc4.binding_table_entry_count = 0;
1709         desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
1710         desc->desc5.constant_urb_entry_read_offset = 0;
1711         desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1712
1713         desc++;
1714     }
1715
1716     dri_bo_unmap(bo);
1717 }
1718
1719 static void
1720 gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
1721 {
1722     switch (tiling) {
1723     case I915_TILING_NONE:
1724         ss->ss0.tiled_surface = 0;
1725         ss->ss0.tile_walk = 0;
1726         break;
1727     case I915_TILING_X:
1728         ss->ss0.tiled_surface = 1;
1729         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1730         break;
1731     case I915_TILING_Y:
1732         ss->ss0.tiled_surface = 1;
1733         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1734         break;
1735     }
1736 }
1737
1738 static void
1739 gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
1740 {
1741     switch (tiling) {
1742     case I915_TILING_NONE:
1743         ss->ss2.tiled_surface = 0;
1744         ss->ss2.tile_walk = 0;
1745         break;
1746     case I915_TILING_X:
1747         ss->ss2.tiled_surface = 1;
1748         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1749         break;
1750     case I915_TILING_Y:
1751         ss->ss2.tiled_surface = 1;
1752         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1753         break;
1754     }
1755 }
1756
1757 static void
1758 gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
1759                               unsigned int cacheability_control,
1760                               unsigned int format,
1761                               unsigned int tiling,
1762                               unsigned int width,
1763                               unsigned int height,
1764                               unsigned int pitch,
1765                               uint64_t base_offset,
1766                               unsigned int y_offset)
1767 {
1768     memset(ss, 0, sizeof(*ss));
1769
1770     /* Always set 1(align 4 mode) */
1771     ss->ss0.vertical_alignment = 1;
1772     ss->ss0.horizontal_alignment = 1;
1773
1774     ss->ss0.surface_format = format;
1775     ss->ss0.surface_type = I965_SURFACE_2D;
1776
1777     ss->ss1.surface_mocs = cacheability_control;
1778
1779     ss->ss2.width = width - 1;
1780     ss->ss2.height = height - 1;
1781
1782     ss->ss3.pitch = pitch - 1;
1783
1784     ss->ss5.y_offset = y_offset;
1785
1786     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1787     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1788     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1789     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1790
1791     ss->ss8.base_addr = (uint32_t)base_offset;
1792     ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1793
1794     gen9_gpe_set_surface_tiling(ss, tiling);
1795 }
1796
1797 /* This is only for NV12 format */
1798 static void
1799 gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
1800                                unsigned int v_direction,
1801                                unsigned int cacheability_control,
1802                                unsigned int format,
1803                                unsigned int tiling,
1804                                unsigned int width,
1805                                unsigned int height,
1806                                unsigned int pitch,
1807                                uint64_t base_offset,
1808                                unsigned int y_cb_offset)
1809 {
1810     memset(ss, 0, sizeof(*ss));
1811
1812     ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
1813     ss->ss1.width = width - 1;
1814     ss->ss1.height = height - 1;
1815
1816     ss->ss2.surface_format = format;
1817     ss->ss2.interleave_chroma = 1;
1818     ss->ss2.pitch = pitch - 1;
1819
1820     ss->ss3.y_offset_for_cb = y_cb_offset;
1821
1822     ss->ss5.surface_object_mocs = cacheability_control;
1823
1824     ss->ss6.base_addr = (uint32_t)base_offset;
1825     ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
1826
1827     gen9_gpe_set_surface2_tiling(ss, tiling);
1828 }
1829
1830 static void
1831 gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
1832                                    unsigned int cacheability_control,
1833                                    unsigned int format,
1834                                    unsigned int size,
1835                                    unsigned int pitch,
1836                                    uint64_t base_offset)
1837 {
1838     memset(ss, 0, sizeof(*ss));
1839
1840     ss->ss0.surface_format = format;
1841     ss->ss0.surface_type = I965_SURFACE_BUFFER;
1842
1843     ss->ss1.surface_mocs = cacheability_control;
1844
1845     ss->ss2.width = (size - 1) & 0x7F;
1846     ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
1847
1848     ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
1849     ss->ss3.pitch = pitch - 1;
1850
1851     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1852     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1853     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1854     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1855
1856     ss->ss8.base_addr = (uint32_t)base_offset;
1857     ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1858 }
1859
1860 void
1861 gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
1862                              struct i965_gpe_surface *gpe_surface,
1863                              int index)
1864 {
1865     char *buf;
1866     unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
1867     unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
1868         index * SURFACE_STATE_PADDED_SIZE_GEN9;
1869     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1870         index * 4;
1871     struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
1872
1873     dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
1874
1875     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1876     buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
1877     *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
1878
1879     if (gpe_surface->is_2d_surface && gpe_surface->is_override_offset) {
1880         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1881
1882         width = gpe_resource->width;
1883         height = gpe_resource->height;
1884         pitch = gpe_resource->pitch;
1885
1886         if (gpe_surface->is_media_block_rw) {
1887             if (gpe_surface->is_16bpp)
1888                 width = (ALIGN(width * 2, 4) >> 2);
1889             else
1890                 width = (ALIGN(width, 4) >> 2);
1891         }
1892
1893
1894         gen9_gpe_set_2d_surface_state(ss,
1895                                       gpe_surface->cacheability_control,
1896                                       gpe_surface->format,
1897                                       tiling,
1898                                       width, height, pitch,
1899                                       gpe_resource->bo->offset64 + gpe_surface->offset,
1900                                       0);
1901
1902         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1903                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1904                           gpe_surface->offset,
1905                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1906                           gpe_resource->bo);
1907     } else if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
1908         unsigned int cbcr_offset;
1909         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1910
1911         width = gpe_resource->width;
1912         height = gpe_resource->height / 2;
1913         pitch = gpe_resource->pitch;
1914
1915         if (gpe_surface->is_media_block_rw) {
1916             if (gpe_surface->is_16bpp)
1917                 width = (ALIGN(width * 2, 4) >> 2);
1918             else
1919                 width = (ALIGN(width, 4) >> 2);
1920         }
1921
1922         if (tiling == I915_TILING_Y) {
1923             tile_alignment = 32;
1924         } else if (tiling == I915_TILING_X) {
1925             tile_alignment = 8;
1926         } else
1927             tile_alignment = 1;
1928
1929         y_offset = (gpe_resource->y_cb_offset % tile_alignment);
1930         cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
1931
1932         gen9_gpe_set_2d_surface_state(ss,
1933                                       gpe_surface->cacheability_control,
1934                                       I965_SURFACEFORMAT_R16_UINT,
1935                                       tiling,
1936                                       width, height, pitch,
1937                                       gpe_resource->bo->offset64 + cbcr_offset,
1938                                       y_offset);
1939
1940         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1941                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1942                           cbcr_offset,
1943                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1944                           gpe_resource->bo);
1945     } else if (gpe_surface->is_2d_surface) {
1946         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1947
1948         width = gpe_resource->width;
1949         height = gpe_resource->height;
1950         pitch = gpe_resource->pitch;
1951
1952         if (gpe_surface->is_media_block_rw) {
1953             if (gpe_surface->is_16bpp)
1954                 width = (ALIGN(width * 2, 4) >> 2);
1955             else
1956                 width = (ALIGN(width, 4) >> 2);
1957         }
1958
1959         gen9_gpe_set_2d_surface_state(ss,
1960                                       gpe_surface->cacheability_control,
1961                                       gpe_surface->format,
1962                                       tiling,
1963                                       width, height, pitch,
1964                                       gpe_resource->bo->offset64,
1965                                       y_offset);
1966
1967         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1968                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1969                           0,
1970                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1971                           gpe_resource->bo);
1972     } else if (gpe_surface->is_adv_surface) {
1973         struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
1974
1975         width = gpe_resource->width;
1976         height = gpe_resource->height;
1977         pitch = gpe_resource->pitch;
1978
1979         gen9_gpe_set_adv_surface_state(ss,
1980                                        gpe_surface->v_direction,
1981                                        gpe_surface->cacheability_control,
1982                                        MFX_SURFACE_PLANAR_420_8,
1983                                        tiling,
1984                                        width, height, pitch,
1985                                        gpe_resource->bo->offset64,
1986                                        gpe_resource->y_cb_offset);
1987
1988         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1989                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1990                           0,
1991                           surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
1992                           gpe_resource->bo);
1993     } else {
1994         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1995         unsigned int format;
1996
1997         assert(gpe_surface->is_buffer);
1998
1999         if (gpe_surface->is_raw_buffer) {
2000             format = I965_SURFACEFORMAT_RAW;
2001             pitch = 1;
2002         } else {
2003             format = I965_SURFACEFORMAT_R32_UINT;
2004             pitch = sizeof(unsigned int);
2005         }
2006
2007         gen9_gpe_set_buffer2_surface_state(ss,
2008                                            gpe_surface->cacheability_control,
2009                                            format,
2010                                            gpe_surface->size,
2011                                            pitch,
2012                                            gpe_resource->bo->offset64 + gpe_surface->offset);
2013
2014         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2015                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2016                           gpe_surface->offset,
2017                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
2018                           gpe_resource->bo);
2019     }
2020
2021     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2022 }
2023
2024 bool
2025 i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
2026                            struct i965_gpe_resource *res,
2027                            int width,
2028                            int height,
2029                            int pitch,
2030                            const char *name)
2031 {
2032     int bo_size;
2033
2034     if (!res)
2035         return false;
2036
2037     res->type = I965_GPE_RESOURCE_2D;
2038     res->width = width;
2039     res->height = height;
2040     res->pitch = pitch;
2041
2042     bo_size = ALIGN(height, 16) * pitch;
2043     res->size = bo_size;
2044
2045     res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
2046     res->map = NULL;
2047
2048     return true;
2049 }
2050
2051 void
2052 gen8_gpe_media_state_flush(VADriverContextP ctx,
2053                            struct i965_gpe_context *gpe_context,
2054                            struct intel_batchbuffer *batch)
2055 {
2056     BEGIN_BATCH(batch, 2);
2057
2058     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
2059     OUT_BATCH(batch, 0);
2060
2061     ADVANCE_BATCH(batch);
2062 }
2063
2064 void
2065 gen8_gpe_media_object(VADriverContextP ctx,
2066                       struct i965_gpe_context *gpe_context,
2067                       struct intel_batchbuffer *batch,
2068                       struct gpe_media_object_parameter *param)
2069 {
2070     int batch_size, subdata_size;
2071
2072     batch_size = 6;
2073     subdata_size = 0;
2074     if (param->pinline_data && param->inline_size) {
2075         subdata_size = ALIGN(param->inline_size, 4);
2076         batch_size += subdata_size / 4;
2077     }
2078     BEGIN_BATCH(batch, batch_size);
2079     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
2080     OUT_BATCH(batch, param->interface_offset);
2081     OUT_BATCH(batch, param->use_scoreboard << 21);
2082     OUT_BATCH(batch, 0);
2083     OUT_BATCH(batch, (param->scoreboard_y << 16 |
2084                       param->scoreboard_x));
2085     OUT_BATCH(batch, param->scoreboard_mask);
2086
2087     if (subdata_size)
2088         intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
2089
2090     ADVANCE_BATCH(batch);
2091 }
2092
2093 void
2094 gen9_gpe_media_object_walker(VADriverContextP ctx,
2095                              struct i965_gpe_context *gpe_context,
2096                              struct intel_batchbuffer *batch,
2097                              struct gpe_media_object_walker_parameter *param)
2098 {
2099     int walker_length;
2100
2101     walker_length = 17;
2102     if (param->inline_size)
2103         walker_length += ALIGN(param->inline_size, 4) / 4;
2104     BEGIN_BATCH(batch, walker_length);
2105     OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
2106     OUT_BATCH(batch, param->interface_offset);
2107     OUT_BATCH(batch, param->use_scoreboard << 21);
2108     OUT_BATCH(batch, 0);
2109     OUT_BATCH(batch, 0);
2110     OUT_BATCH(batch, (param->group_id_loop_select << 8 |
2111                       param->scoreboard_mask)); // DW5
2112     OUT_BATCH(batch, (param->color_count_minus1 << 24 |
2113                       param->middle_loop_extra_steps << 16 |
2114                       param->mid_loop_unit_y << 12 |
2115                       param->mid_loop_unit_x << 8));
2116     OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
2117                       (param->local_loop_exec_count & 0x3ff)));
2118     OUT_BATCH(batch, param->block_resolution.value);
2119     OUT_BATCH(batch, param->local_start.value);
2120     OUT_BATCH(batch, 0); // DW10
2121     OUT_BATCH(batch, param->local_outer_loop_stride.value);
2122     OUT_BATCH(batch, param->local_inner_loop_unit.value);
2123     OUT_BATCH(batch, param->global_resolution.value);
2124     OUT_BATCH(batch, param->global_start.value);
2125     OUT_BATCH(batch, param->global_outer_loop_stride.value);
2126     OUT_BATCH(batch, param->global_inner_loop_unit.value);
2127
2128     if (param->pinline_data && param->inline_size)
2129         intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
2130
2131     ADVANCE_BATCH(batch);
2132 }
2133
2134
2135 void
2136 intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
2137                                         struct gpe_media_object_walker_parameter *walker_param)
2138 {
2139     memset(walker_param, 0, sizeof(*walker_param));
2140
2141     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
2142
2143     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
2144     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
2145
2146     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
2147     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
2148
2149     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
2150     walker_param->global_outer_loop_stride.y = 0;
2151
2152     walker_param->global_inner_loop_unit.x = 0;
2153     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
2154
2155     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
2156     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
2157
2158     if (kernel_walker_param->no_dependency) {
2159         /* The no_dependency is used for VPP */
2160         walker_param->scoreboard_mask = 0;
2161         walker_param->use_scoreboard = 0;
2162         // Raster scan walking pattern
2163         walker_param->local_outer_loop_stride.x = 0;
2164         walker_param->local_outer_loop_stride.y = 1;
2165         walker_param->local_inner_loop_unit.x = 1;
2166         walker_param->local_inner_loop_unit.y = 0;
2167         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
2168         walker_param->local_end.y = 0;
2169     } else {
2170         walker_param->local_end.x = 0;
2171         walker_param->local_end.y = 0;
2172
2173         // 26 degree
2174         walker_param->scoreboard_mask = 0x0F;
2175         walker_param->local_outer_loop_stride.x = 1;
2176         walker_param->local_outer_loop_stride.y = 0;
2177         walker_param->local_inner_loop_unit.x = -2;
2178         walker_param->local_inner_loop_unit.y = 1;
2179     }
2180 }