OSDN Git Service

Add the 10-bit flag for MEDIA_BLOCK_RW operation on P010 surface
[android-x86/hardware-intel-common-vaapi.git] / src / i965_gpe_utils.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Xiang Haihao <haihao.xiang@intel.com>
25  */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_gpe_utils.h"
36
37 static void
38 i965_gpe_select(VADriverContextP ctx,
39                 struct i965_gpe_context *gpe_context,
40                 struct intel_batchbuffer *batch)
41 {
42     BEGIN_BATCH(batch, 1);
43     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
44     ADVANCE_BATCH(batch);
45 }
46
47 static void
48 gen6_gpe_state_base_address(VADriverContextP ctx,
49                             struct i965_gpe_context *gpe_context,
50                             struct intel_batchbuffer *batch)
51 {
52     BEGIN_BATCH(batch, 10);
53
54     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
55     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Base Address */
56     OUT_RELOC(batch,
57               gpe_context->surface_state_binding_table.bo,
58               I915_GEM_DOMAIN_INSTRUCTION,
59               0,
60               BASE_ADDRESS_MODIFY);                     /* Surface state base address */
61     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Base Address */
62     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Base Address */
63     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Base Address */
64     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Access Upper Bound */
65     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Access Upper Bound */
66     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Access Upper Bound */
67     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Access Upper Bound */
68
69     ADVANCE_BATCH(batch);
70 }
71
72 static void
73 gen6_gpe_vfe_state(VADriverContextP ctx,
74                    struct i965_gpe_context *gpe_context,
75                    struct intel_batchbuffer *batch)
76 {
77
78     BEGIN_BATCH(batch, 8);
79
80     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
81     OUT_BATCH(batch, 0);                                        /* Scratch Space Base Pointer and Space */
82     OUT_BATCH(batch,
83               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
84               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
85               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
86     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
87     OUT_BATCH(batch,
88               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
89               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
90     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
91     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
92     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
93     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
94         
95     ADVANCE_BATCH(batch);
96
97 }
98
99 static void
100 gen6_gpe_curbe_load(VADriverContextP ctx,
101                     struct i965_gpe_context *gpe_context,
102                     struct intel_batchbuffer *batch)
103 {
104     BEGIN_BATCH(batch, 4);
105
106     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
107     OUT_BATCH(batch, 0);
108     OUT_BATCH(batch, gpe_context->curbe.length);
109     OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
110
111     ADVANCE_BATCH(batch);
112 }
113
114 static void
115 gen6_gpe_idrt(VADriverContextP ctx,
116               struct i965_gpe_context *gpe_context,
117               struct intel_batchbuffer *batch)
118 {
119     BEGIN_BATCH(batch, 4);
120
121     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
122     OUT_BATCH(batch, 0);
123     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
124     OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
125
126     ADVANCE_BATCH(batch);
127 }
128
129 void
130 i965_gpe_load_kernels(VADriverContextP ctx,
131                       struct i965_gpe_context *gpe_context,
132                       struct i965_kernel *kernel_list,
133                       unsigned int num_kernels)
134 {
135     struct i965_driver_data *i965 = i965_driver_data(ctx);
136     int i;
137
138     assert(num_kernels <= MAX_GPE_KERNELS);
139     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
140     gpe_context->num_kernels = num_kernels;
141
142     for (i = 0; i < num_kernels; i++) {
143         struct i965_kernel *kernel = &gpe_context->kernels[i];
144
145         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
146                                   kernel->name, 
147                                   kernel->size,
148                                   0x1000);
149         assert(kernel->bo);
150         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
151     }
152 }
153
154 void
155 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
156 {
157     int i;
158
159     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
160     gpe_context->surface_state_binding_table.bo = NULL;
161
162     dri_bo_unreference(gpe_context->idrt.bo);
163     gpe_context->idrt.bo = NULL;
164
165     dri_bo_unreference(gpe_context->curbe.bo);
166     gpe_context->curbe.bo = NULL;
167
168     for (i = 0; i < gpe_context->num_kernels; i++) {
169         struct i965_kernel *kernel = &gpe_context->kernels[i];
170
171         dri_bo_unreference(kernel->bo);
172         kernel->bo = NULL;
173     }
174 }
175
176 void
177 i965_gpe_context_init(VADriverContextP ctx,
178                       struct i965_gpe_context *gpe_context)
179 {
180     struct i965_driver_data *i965 = i965_driver_data(ctx);
181     dri_bo *bo;
182
183     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
184     bo = dri_bo_alloc(i965->intel.bufmgr,
185                       "surface state & binding table",
186                       gpe_context->surface_state_binding_table.length,
187                       4096);
188     assert(bo);
189     gpe_context->surface_state_binding_table.bo = bo;
190
191     dri_bo_unreference(gpe_context->idrt.bo);
192     bo = dri_bo_alloc(i965->intel.bufmgr,
193                       "interface descriptor table",
194                       gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
195                       4096);
196     assert(bo);
197     gpe_context->idrt.bo = bo;
198
199     dri_bo_unreference(gpe_context->curbe.bo);
200     bo = dri_bo_alloc(i965->intel.bufmgr,
201                       "curbe buffer",
202                       gpe_context->curbe.length,
203                       4096);
204     assert(bo);
205     gpe_context->curbe.bo = bo;
206 }
207
208 void
209 gen6_gpe_pipeline_setup(VADriverContextP ctx,
210                         struct i965_gpe_context *gpe_context,
211                         struct intel_batchbuffer *batch)
212 {
213     intel_batchbuffer_emit_mi_flush(batch);
214
215     i965_gpe_select(ctx, gpe_context, batch);
216     gen6_gpe_state_base_address(ctx, gpe_context, batch);
217     gen6_gpe_vfe_state(ctx, gpe_context, batch);
218     gen6_gpe_curbe_load(ctx, gpe_context, batch);
219     gen6_gpe_idrt(ctx, gpe_context, batch);
220 }
221
222 static void
223 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
224 {
225     switch (tiling) {
226     case I915_TILING_NONE:
227         ss->ss3.tiled_surface = 0;
228         ss->ss3.tile_walk = 0;
229         break;
230     case I915_TILING_X:
231         ss->ss3.tiled_surface = 1;
232         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
233         break;
234     case I915_TILING_Y:
235         ss->ss3.tiled_surface = 1;
236         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
237         break;
238     }
239 }
240
241 static void
242 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
243 {
244     switch (tiling) {
245     case I915_TILING_NONE:
246         ss->ss2.tiled_surface = 0;
247         ss->ss2.tile_walk = 0;
248         break;
249     case I915_TILING_X:
250         ss->ss2.tiled_surface = 1;
251         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
252         break;
253     case I915_TILING_Y:
254         ss->ss2.tiled_surface = 1;
255         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
256         break;
257     }
258 }
259
260 static void
261 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
262 {
263     switch (tiling) {
264     case I915_TILING_NONE:
265         ss->ss0.tiled_surface = 0;
266         ss->ss0.tile_walk = 0;
267         break;
268     case I915_TILING_X:
269         ss->ss0.tiled_surface = 1;
270         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
271         break;
272     case I915_TILING_Y:
273         ss->ss0.tiled_surface = 1;
274         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
275         break;
276     }
277 }
278
279 static void
280 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
281 {
282     switch (tiling) {
283     case I915_TILING_NONE:
284         ss->ss2.tiled_surface = 0;
285         ss->ss2.tile_walk = 0;
286         break;
287     case I915_TILING_X:
288         ss->ss2.tiled_surface = 1;
289         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
290         break;
291     case I915_TILING_Y:
292         ss->ss2.tiled_surface = 1;
293         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
294         break;
295     }
296 }
297
298 static void
299 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
300 {
301     switch (tiling) {
302     case I915_TILING_NONE:
303         ss->ss0.tiled_surface = 0;
304         ss->ss0.tile_walk = 0;
305         break;
306     case I915_TILING_X:
307         ss->ss0.tiled_surface = 1;
308         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
309         break;
310     case I915_TILING_Y:
311         ss->ss0.tiled_surface = 1;
312         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
313         break;
314     }
315 }
316
317 static void
318 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
319 {
320     switch (tiling) {
321     case I915_TILING_NONE:
322         ss->ss2.tiled_surface = 0;
323         ss->ss2.tile_walk = 0;
324         break;
325     case I915_TILING_X:
326         ss->ss2.tiled_surface = 1;
327         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
328         break;
329     case I915_TILING_Y:
330         ss->ss2.tiled_surface = 1;
331         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
332         break;
333     }
334 }
335
336 static void
337 i965_gpe_set_surface2_state(VADriverContextP ctx,
338                             struct object_surface *obj_surface,
339                             struct i965_surface_state2 *ss)
340 {
341     int w, h, w_pitch;
342     unsigned int tiling, swizzle;
343
344     assert(obj_surface->bo);
345     assert(obj_surface->fourcc == VA_FOURCC_NV12);
346
347     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
348     w = obj_surface->orig_width;
349     h = obj_surface->orig_height;
350     w_pitch = obj_surface->width;
351
352     memset(ss, 0, sizeof(*ss));
353     /* ss0 */
354     ss->ss0.surface_base_address = obj_surface->bo->offset;
355     /* ss1 */
356     ss->ss1.cbcr_pixel_offset_v_direction = 2;
357     ss->ss1.width = w - 1;
358     ss->ss1.height = h - 1;
359     /* ss2 */
360     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
361     ss->ss2.interleave_chroma = 1;
362     ss->ss2.pitch = w_pitch - 1;
363     ss->ss2.half_pitch_for_chroma = 0;
364     i965_gpe_set_surface2_tiling(ss, tiling);
365     /* ss3: UV offset for interleave mode */
366     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
367     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
368 }
369
370 void
371 i965_gpe_surface2_setup(VADriverContextP ctx,
372                         struct i965_gpe_context *gpe_context,
373                         struct object_surface *obj_surface,
374                         unsigned long binding_table_offset,
375                         unsigned long surface_state_offset)
376 {
377     struct i965_surface_state2 *ss;
378     dri_bo *bo;
379
380     bo = gpe_context->surface_state_binding_table.bo;
381     dri_bo_map(bo, 1);
382     assert(bo->virtual);
383
384     ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
385     i965_gpe_set_surface2_state(ctx, obj_surface, ss);
386     dri_bo_emit_reloc(bo,
387                       I915_GEM_DOMAIN_RENDER, 0,
388                       0,
389                       surface_state_offset + offsetof(struct i965_surface_state2, ss0),
390                       obj_surface->bo);
391
392     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
393     dri_bo_unmap(bo);
394 }
395
396 static void
397 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
398                                     struct object_surface *obj_surface,
399                                     struct i965_surface_state *ss)
400 {
401     int w, h, w_pitch;
402     unsigned int tiling, swizzle;
403
404     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
405     w = obj_surface->orig_width;
406     h = obj_surface->orig_height;
407     w_pitch = obj_surface->width;
408
409     memset(ss, 0, sizeof(*ss));
410     /* ss0 */
411     ss->ss0.surface_type = I965_SURFACE_2D;
412     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
413     /* ss1 */
414     ss->ss1.base_addr = obj_surface->bo->offset;
415     /* ss2 */
416     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
417     ss->ss2.height = h - 1;
418     /* ss3 */
419     ss->ss3.pitch = w_pitch - 1;
420     i965_gpe_set_surface_tiling(ss, tiling);
421 }
422
423 void
424 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
425                                 struct i965_gpe_context *gpe_context,
426                                 struct object_surface *obj_surface,
427                                 unsigned long binding_table_offset,
428                                 unsigned long surface_state_offset,
429                                 int write_enabled)
430 {
431     struct i965_surface_state *ss;
432     dri_bo *bo;
433
434     bo = gpe_context->surface_state_binding_table.bo;
435     dri_bo_map(bo, True);
436     assert(bo->virtual);
437
438     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
439     i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
440     dri_bo_emit_reloc(bo,
441                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
442                       0,
443                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
444                       obj_surface->bo);
445
446     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
447     dri_bo_unmap(bo);
448 }
449
450 static void
451 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
452                                   struct i965_buffer_surface *buffer_surface,
453                                   struct i965_surface_state *ss)
454 {
455     int num_entries;
456
457     assert(buffer_surface->bo);
458     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
459
460     memset(ss, 0, sizeof(*ss));
461     /* ss0 */
462     ss->ss0.render_cache_read_mode = 1;
463     ss->ss0.surface_type = I965_SURFACE_BUFFER;
464     /* ss1 */
465     ss->ss1.base_addr = buffer_surface->bo->offset;
466     /* ss2 */
467     ss->ss2.width = ((num_entries - 1) & 0x7f);
468     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
469     /* ss3 */
470     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
471     ss->ss3.pitch = buffer_surface->pitch - 1;
472 }
473
474 void
475 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
476                              struct i965_gpe_context *gpe_context,
477                              struct i965_buffer_surface *buffer_surface,
478                              unsigned long binding_table_offset,
479                              unsigned long surface_state_offset)
480 {
481     struct i965_surface_state *ss;
482     dri_bo *bo;
483
484     bo = gpe_context->surface_state_binding_table.bo;
485     dri_bo_map(bo, 1);
486     assert(bo->virtual);
487
488     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
489     i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
490     dri_bo_emit_reloc(bo,
491                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
492                       0,
493                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
494                       buffer_surface->bo);
495
496     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
497     dri_bo_unmap(bo);
498 }
499
500 static void
501 gen7_gpe_set_surface2_state(VADriverContextP ctx,
502                             struct object_surface *obj_surface,
503                             struct gen7_surface_state2 *ss)
504 {
505     int w, h, w_pitch;
506     unsigned int tiling, swizzle;
507
508     assert(obj_surface->bo);
509     assert(obj_surface->fourcc == VA_FOURCC_NV12);
510
511     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
512     w = obj_surface->orig_width;
513     h = obj_surface->orig_height;
514     w_pitch = obj_surface->width;
515
516     memset(ss, 0, sizeof(*ss));
517     /* ss0 */
518     ss->ss0.surface_base_address = obj_surface->bo->offset;
519     /* ss1 */
520     ss->ss1.cbcr_pixel_offset_v_direction = 2;
521     ss->ss1.width = w - 1;
522     ss->ss1.height = h - 1;
523     /* ss2 */
524     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
525     ss->ss2.interleave_chroma = 1;
526     ss->ss2.pitch = w_pitch - 1;
527     ss->ss2.half_pitch_for_chroma = 0;
528     gen7_gpe_set_surface2_tiling(ss, tiling);
529     /* ss3: UV offset for interleave mode */
530     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
531     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
532 }
533
534 void
535 gen7_gpe_surface2_setup(VADriverContextP ctx,
536                         struct i965_gpe_context *gpe_context,
537                         struct object_surface *obj_surface,
538                         unsigned long binding_table_offset,
539                         unsigned long surface_state_offset)
540 {
541     struct gen7_surface_state2 *ss;
542     dri_bo *bo;
543
544     bo = gpe_context->surface_state_binding_table.bo;
545     dri_bo_map(bo, 1);
546     assert(bo->virtual);
547
548     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
549     gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
550     dri_bo_emit_reloc(bo,
551                       I915_GEM_DOMAIN_RENDER, 0,
552                       0,
553                       surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
554                       obj_surface->bo);
555
556     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
557     dri_bo_unmap(bo);
558 }
559
560 static void
561 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
562                                     struct object_surface *obj_surface,
563                                     struct gen7_surface_state *ss)
564 {
565     int w, h, w_pitch;
566     unsigned int tiling, swizzle;
567
568     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
569     w = obj_surface->orig_width;
570     h = obj_surface->orig_height;
571     w_pitch = obj_surface->width;
572
573     memset(ss, 0, sizeof(*ss));
574     /* ss0 */
575     ss->ss0.surface_type = I965_SURFACE_2D;
576     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
577     /* ss1 */
578     ss->ss1.base_addr = obj_surface->bo->offset;
579     /* ss2 */
580     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
581     ss->ss2.height = h - 1;
582     /* ss3 */
583     ss->ss3.pitch = w_pitch - 1;
584     gen7_gpe_set_surface_tiling(ss, tiling);
585 }
586
587 static void
588 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
589                                     struct object_surface *obj_surface,
590                                     struct gen7_surface_state *ss)
591 {
592     int w, w_pitch;
593     unsigned int tiling, swizzle;
594     int cbcr_offset;
595
596     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
597     w = obj_surface->orig_width;
598     w_pitch = obj_surface->width;
599
600     cbcr_offset = obj_surface->height * obj_surface->width;
601     memset(ss, 0, sizeof(*ss));
602     /* ss0 */
603     ss->ss0.surface_type = I965_SURFACE_2D;
604     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
605     /* ss1 */
606     ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
607     /* ss2 */
608     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
609     ss->ss2.height = (obj_surface->height / 2) -1;
610     /* ss3 */
611     ss->ss3.pitch = w_pitch - 1;
612     gen7_gpe_set_surface_tiling(ss, tiling);
613 }
614
615 void
616 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
617                                 struct i965_gpe_context *gpe_context,
618                                 struct object_surface *obj_surface,
619                                 unsigned long binding_table_offset,
620                                 unsigned long surface_state_offset,
621                                 int write_enabled)
622 {
623     struct gen7_surface_state *ss;
624     dri_bo *bo;
625
626     bo = gpe_context->surface_state_binding_table.bo;
627     dri_bo_map(bo, True);
628     assert(bo->virtual);
629
630     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
631     gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
632     dri_bo_emit_reloc(bo,
633                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
634                       0,
635                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
636                       obj_surface->bo);
637
638     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
639     dri_bo_unmap(bo);
640 }
641
642 void
643 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
644                                 struct i965_gpe_context *gpe_context,
645                                 struct object_surface *obj_surface,
646                                 unsigned long binding_table_offset,
647                                 unsigned long surface_state_offset,
648                                 int write_enabled)
649 {
650     struct gen7_surface_state *ss;
651     dri_bo *bo;
652     int cbcr_offset;
653
654         assert(obj_surface->fourcc == VA_FOURCC_NV12);
655     bo = gpe_context->surface_state_binding_table.bo;
656     dri_bo_map(bo, True);
657     assert(bo->virtual);
658
659     cbcr_offset = obj_surface->height * obj_surface->width;
660     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
661     gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
662     dri_bo_emit_reloc(bo,
663                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
664                       cbcr_offset,
665                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
666                       obj_surface->bo);
667
668     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
669     dri_bo_unmap(bo);
670 }
671
672
673 static void
674 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
675                                   struct i965_buffer_surface *buffer_surface,
676                                   struct gen7_surface_state *ss)
677 {
678     int num_entries;
679
680     assert(buffer_surface->bo);
681     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
682
683     memset(ss, 0, sizeof(*ss));
684     /* ss0 */
685     ss->ss0.surface_type = I965_SURFACE_BUFFER;
686     /* ss1 */
687     ss->ss1.base_addr = buffer_surface->bo->offset;
688     /* ss2 */
689     ss->ss2.width = ((num_entries - 1) & 0x7f);
690     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
691     /* ss3 */
692     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
693     ss->ss3.pitch = buffer_surface->pitch - 1;
694 }
695
696 void
697 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
698                              struct i965_gpe_context *gpe_context,
699                              struct i965_buffer_surface *buffer_surface,
700                              unsigned long binding_table_offset,
701                              unsigned long surface_state_offset)
702 {
703     struct gen7_surface_state *ss;
704     dri_bo *bo;
705
706     bo = gpe_context->surface_state_binding_table.bo;
707     dri_bo_map(bo, 1);
708     assert(bo->virtual);
709
710     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
711     gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
712     dri_bo_emit_reloc(bo,
713                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
714                       0,
715                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
716                       buffer_surface->bo);
717
718     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
719     dri_bo_unmap(bo);
720 }
721
722 static void
723 gen8_gpe_set_surface2_state(VADriverContextP ctx,
724                             struct object_surface *obj_surface,
725                             struct gen8_surface_state2 *ss)
726 {
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     int w, h, w_pitch;
729     unsigned int tiling, swizzle;
730
731     assert(obj_surface->bo);
732     assert(obj_surface->fourcc == VA_FOURCC_NV12);
733
734     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
735     w = obj_surface->orig_width;
736     h = obj_surface->orig_height;
737     w_pitch = obj_surface->width;
738
739     memset(ss, 0, sizeof(*ss));
740     /* ss0 */
741     if (IS_GEN9(i965->intel.device_info))
742         ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
743
744     ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
745     ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
746     /* ss1 */
747     ss->ss1.cbcr_pixel_offset_v_direction = 2;
748     ss->ss1.width = w - 1;
749     ss->ss1.height = h - 1;
750     /* ss2 */
751     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
752     ss->ss2.interleave_chroma = 1;
753     ss->ss2.pitch = w_pitch - 1;
754     ss->ss2.half_pitch_for_chroma = 0;
755     gen8_gpe_set_surface2_tiling(ss, tiling);
756     /* ss3: UV offset for interleave mode */
757     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
758     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
759 }
760
761 void
762 gen8_gpe_surface2_setup(VADriverContextP ctx,
763                         struct i965_gpe_context *gpe_context,
764                         struct object_surface *obj_surface,
765                         unsigned long binding_table_offset,
766                         unsigned long surface_state_offset)
767 {
768     struct gen8_surface_state2 *ss;
769     dri_bo *bo;
770
771     bo = gpe_context->surface_state_binding_table.bo;
772     dri_bo_map(bo, 1);
773     assert(bo->virtual);
774
775     ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
776     gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
777     dri_bo_emit_reloc(bo,
778                       I915_GEM_DOMAIN_RENDER, 0,
779                       0,
780                       surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
781                       obj_surface->bo);
782
783     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
784     dri_bo_unmap(bo);
785 }
786
787 static void
788 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
789                                     struct object_surface *obj_surface,
790                                     struct gen8_surface_state *ss)
791 {
792     struct i965_driver_data *i965 = i965_driver_data(ctx);
793     int w, h, w_pitch;
794     unsigned int tiling, swizzle;
795
796     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
797     w = obj_surface->orig_width;
798     h = obj_surface->orig_height;
799     w_pitch = obj_surface->width;
800
801     memset(ss, 0, sizeof(*ss));
802     /* ss0 */
803     if (IS_GEN9(i965->intel.device_info))
804         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
805
806     ss->ss0.surface_type = I965_SURFACE_2D;
807     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
808     /* ss1 */
809     ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
810     ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
811     /* ss2 */
812     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
813     ss->ss2.height = h - 1;
814     /* ss3 */
815     ss->ss3.pitch = w_pitch - 1;
816     gen8_gpe_set_surface_tiling(ss, tiling);
817 }
818
819 static void
820 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
821                                     struct object_surface *obj_surface,
822                                     struct gen8_surface_state *ss)
823 {
824     struct i965_driver_data *i965 = i965_driver_data(ctx);
825     int w, w_pitch;
826     unsigned int tiling, swizzle;
827     int cbcr_offset;
828     uint64_t base_offset;
829
830     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
831     w = obj_surface->orig_width;
832     w_pitch = obj_surface->width;
833
834     cbcr_offset = obj_surface->height * obj_surface->width;
835     memset(ss, 0, sizeof(*ss));
836     /* ss0 */
837     if (IS_GEN9(i965->intel.device_info))
838         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
839
840     ss->ss0.surface_type = I965_SURFACE_2D;
841     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
842     /* ss1 */
843     base_offset = obj_surface->bo->offset64 + cbcr_offset;
844     ss->ss8.base_addr = (uint32_t) base_offset;
845     ss->ss9.base_addr_high = (uint32_t) (base_offset >> 32);
846     /* ss2 */
847     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
848     ss->ss2.height = (obj_surface->height / 2) -1;
849     /* ss3 */
850     ss->ss3.pitch = w_pitch - 1;
851     gen8_gpe_set_surface_tiling(ss, tiling);
852 }
853
854 void
855 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
856                                 struct i965_gpe_context *gpe_context,
857                                 struct object_surface *obj_surface,
858                                 unsigned long binding_table_offset,
859                                 unsigned long surface_state_offset,
860                                 int write_enabled)
861 {
862     struct gen8_surface_state *ss;
863     dri_bo *bo;
864
865     bo = gpe_context->surface_state_binding_table.bo;
866     dri_bo_map(bo, True);
867     assert(bo->virtual);
868
869     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
870     gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
871     dri_bo_emit_reloc(bo,
872                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
873                       0,
874                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
875                       obj_surface->bo);
876
877     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
878     dri_bo_unmap(bo);
879 }
880
881 void
882 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
883                                 struct i965_gpe_context *gpe_context,
884                                 struct object_surface *obj_surface,
885                                 unsigned long binding_table_offset,
886                                 unsigned long surface_state_offset,
887                                 int write_enabled)
888 {
889     struct gen8_surface_state *ss;
890     dri_bo *bo;
891     int cbcr_offset;
892
893         assert(obj_surface->fourcc == VA_FOURCC_NV12);
894     bo = gpe_context->surface_state_binding_table.bo;
895     dri_bo_map(bo, True);
896     assert(bo->virtual);
897
898     cbcr_offset = obj_surface->height * obj_surface->width;
899     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
900     gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
901     dri_bo_emit_reloc(bo,
902                       I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
903                       cbcr_offset,
904                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
905                       obj_surface->bo);
906
907     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
908     dri_bo_unmap(bo);
909 }
910
911
912 static void
913 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
914                                   struct i965_buffer_surface *buffer_surface,
915                                   struct gen8_surface_state *ss)
916 {
917     struct i965_driver_data *i965 = i965_driver_data(ctx);
918     int num_entries;
919
920     assert(buffer_surface->bo);
921     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
922
923     memset(ss, 0, sizeof(*ss));
924     /* ss0 */
925     ss->ss0.surface_type = I965_SURFACE_BUFFER;
926     if (IS_GEN9(i965->intel.device_info))
927         ss->ss1.surface_mocs = GEN9_CACHE_PTE;
928
929     /* ss1 */
930     ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
931     ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
932     /* ss2 */
933     ss->ss2.width = ((num_entries - 1) & 0x7f);
934     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
935     /* ss3 */
936     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
937     ss->ss3.pitch = buffer_surface->pitch - 1;
938 }
939
940 void
941 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
942                              struct i965_gpe_context *gpe_context,
943                              struct i965_buffer_surface *buffer_surface,
944                              unsigned long binding_table_offset,
945                              unsigned long surface_state_offset)
946 {
947     struct gen8_surface_state *ss;
948     dri_bo *bo;
949
950     bo = gpe_context->surface_state_binding_table.bo;
951     dri_bo_map(bo, 1);
952     assert(bo->virtual);
953
954     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
955     gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
956     dri_bo_emit_reloc(bo,
957                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
958                       0,
959                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
960                       buffer_surface->bo);
961
962     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
963     dri_bo_unmap(bo);
964 }
965
966 static void
967 gen8_gpe_state_base_address(VADriverContextP ctx,
968                             struct i965_gpe_context *gpe_context,
969                             struct intel_batchbuffer *batch)
970 {
971     BEGIN_BATCH(batch, 16);
972
973     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
974
975     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
976     OUT_BATCH(batch, 0);
977     OUT_BATCH(batch, 0);
978
979         /*DW4 Surface state base address */
980     OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
981
982         /*DW6. Dynamic state base address */
983     if (gpe_context->dynamic_state.bo)
984         OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
985                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
986                   0, BASE_ADDRESS_MODIFY);
987     else {
988         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
989         OUT_BATCH(batch, 0);
990     }
991
992
993         /*DW8. Indirect Object base address */
994     if (gpe_context->indirect_state.bo)
995         OUT_RELOC64(batch, gpe_context->indirect_state.bo,
996                   I915_GEM_DOMAIN_SAMPLER,
997                   0, BASE_ADDRESS_MODIFY);
998     else {
999         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(batch, 0);
1001     }
1002
1003
1004         /*DW10. Instruct base address */
1005     if (gpe_context->instruction_state.bo)
1006         OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1007                   I915_GEM_DOMAIN_INSTRUCTION,
1008                   0, BASE_ADDRESS_MODIFY);
1009     else {
1010         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1011         OUT_BATCH(batch, 0);
1012     }
1013
1014         /* DW12. Size limitation */
1015     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound      
1016     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
1017     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
1018     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
1019
1020     /*
1021       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
1022       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
1023     */
1024
1025     ADVANCE_BATCH(batch);
1026 }
1027
1028 static void
1029 gen8_gpe_vfe_state(VADriverContextP ctx,
1030                    struct i965_gpe_context *gpe_context,
1031                    struct intel_batchbuffer *batch)
1032 {
1033
1034     BEGIN_BATCH(batch, 9);
1035
1036     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1037     /* Scratch Space Base Pointer and Space */
1038     OUT_BATCH(batch, 0);    
1039     OUT_BATCH(batch, 0);
1040
1041     OUT_BATCH(batch,
1042               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
1043               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
1044               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
1045     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
1046     OUT_BATCH(batch,
1047               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
1048               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
1049
1050     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1051     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
1052     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                       
1053     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                       
1054         
1055     ADVANCE_BATCH(batch);
1056
1057 }
1058
1059
1060 static void
1061 gen8_gpe_curbe_load(VADriverContextP ctx,
1062                     struct i965_gpe_context *gpe_context,
1063                     struct intel_batchbuffer *batch)
1064 {
1065     BEGIN_BATCH(batch, 4);
1066
1067     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1068     OUT_BATCH(batch, 0);
1069     OUT_BATCH(batch, gpe_context->curbe_size);
1070     OUT_BATCH(batch, gpe_context->curbe_offset);
1071
1072     ADVANCE_BATCH(batch);
1073 }
1074
1075 static void
1076 gen8_gpe_idrt(VADriverContextP ctx,
1077               struct i965_gpe_context *gpe_context,
1078               struct intel_batchbuffer *batch)
1079 {
1080     BEGIN_BATCH(batch, 6);
1081
1082     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1083     OUT_BATCH(batch, 0);
1084
1085     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1086     OUT_BATCH(batch, 0);
1087     OUT_BATCH(batch, gpe_context->idrt_size);
1088     OUT_BATCH(batch, gpe_context->idrt_offset);
1089
1090     ADVANCE_BATCH(batch);
1091 }
1092
1093
1094 void
1095 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1096                         struct i965_gpe_context *gpe_context,
1097                         struct intel_batchbuffer *batch)
1098 {
1099     intel_batchbuffer_emit_mi_flush(batch);
1100
1101     i965_gpe_select(ctx, gpe_context, batch);
1102     gen8_gpe_state_base_address(ctx, gpe_context, batch);
1103     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1104     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1105     gen8_gpe_idrt(ctx, gpe_context, batch);
1106 }
1107
1108 void
1109 gen8_gpe_context_init(VADriverContextP ctx,
1110                       struct i965_gpe_context *gpe_context)
1111 {
1112     struct i965_driver_data *i965 = i965_driver_data(ctx);
1113     dri_bo *bo;
1114     int bo_size;
1115     unsigned int start_offset, end_offset;
1116
1117     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1118     bo = dri_bo_alloc(i965->intel.bufmgr,
1119                       "surface state & binding table",
1120                       gpe_context->surface_state_binding_table.length,
1121                       4096);
1122     assert(bo);
1123     gpe_context->surface_state_binding_table.bo = bo;
1124
1125     bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
1126     dri_bo_unreference(gpe_context->dynamic_state.bo);
1127     bo = dri_bo_alloc(i965->intel.bufmgr,
1128                       "surface state & binding table",
1129                       bo_size,
1130                       4096);
1131     assert(bo);
1132     gpe_context->dynamic_state.bo = bo;
1133     gpe_context->dynamic_state.bo_size = bo_size;
1134
1135     end_offset = 0;
1136     gpe_context->dynamic_state.end_offset = 0;
1137
1138     /* Constant buffer offset */
1139     start_offset = ALIGN(end_offset, 64);
1140     gpe_context->curbe_offset = start_offset;
1141     end_offset = start_offset + gpe_context->curbe_size;
1142
1143     /* Interface descriptor offset */
1144     start_offset = ALIGN(end_offset, 64);
1145     gpe_context->idrt_offset = start_offset;
1146     end_offset = start_offset + gpe_context->idrt_size;
1147
1148     /* Sampler state offset */
1149     start_offset = ALIGN(end_offset, 64);
1150     gpe_context->sampler_offset = start_offset;
1151     end_offset = start_offset + gpe_context->sampler_size;
1152
1153     /* update the end offset of dynamic_state */
1154     gpe_context->dynamic_state.end_offset = end_offset;
1155 }
1156
1157
1158 void
1159 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1160 {
1161     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1162     gpe_context->surface_state_binding_table.bo = NULL;
1163
1164     dri_bo_unreference(gpe_context->instruction_state.bo);
1165     gpe_context->instruction_state.bo = NULL;
1166
1167     dri_bo_unreference(gpe_context->dynamic_state.bo);
1168     gpe_context->dynamic_state.bo = NULL;
1169
1170     dri_bo_unreference(gpe_context->indirect_state.bo);
1171     gpe_context->indirect_state.bo = NULL;
1172
1173 }
1174
1175
1176 void
1177 gen8_gpe_load_kernels(VADriverContextP ctx,
1178                       struct i965_gpe_context *gpe_context,
1179                       struct i965_kernel *kernel_list,
1180                       unsigned int num_kernels)
1181 {
1182     struct i965_driver_data *i965 = i965_driver_data(ctx);
1183     int i, kernel_size;
1184     unsigned int kernel_offset, end_offset;
1185     unsigned char *kernel_ptr;
1186     struct i965_kernel *kernel;
1187
1188     assert(num_kernels <= MAX_GPE_KERNELS);
1189     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1190     gpe_context->num_kernels = num_kernels;
1191
1192     kernel_size = num_kernels * 64;
1193     for (i = 0; i < num_kernels; i++) {
1194         kernel = &gpe_context->kernels[i];
1195
1196         kernel_size += kernel->size;
1197     }
1198
1199     gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1200                                   "kernel shader",
1201                                   kernel_size,
1202                                   0x1000);
1203     if (gpe_context->instruction_state.bo == NULL) {
1204         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1205         return;
1206     }
1207
1208     assert(gpe_context->instruction_state.bo);
1209
1210     gpe_context->instruction_state.bo_size = kernel_size;
1211     gpe_context->instruction_state.end_offset = 0;
1212     end_offset = 0;
1213
1214     dri_bo_map(gpe_context->instruction_state.bo, 1);
1215     kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1216     for (i = 0; i < num_kernels; i++) {
1217         kernel_offset = ALIGN(end_offset, 64);
1218         kernel = &gpe_context->kernels[i];
1219         kernel->kernel_offset = kernel_offset;
1220
1221         if (kernel->size) {
1222             memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1223
1224             end_offset = kernel_offset + kernel->size;
1225         }
1226     }
1227
1228     gpe_context->instruction_state.end_offset = end_offset;
1229
1230     dri_bo_unmap(gpe_context->instruction_state.bo);
1231
1232     return;
1233 }
1234
1235 static void
1236 gen9_gpe_state_base_address(VADriverContextP ctx,
1237                             struct i965_gpe_context *gpe_context,
1238                             struct intel_batchbuffer *batch)
1239 {
1240     BEGIN_BATCH(batch, 19);
1241
1242     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
1243
1244     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
1245     OUT_BATCH(batch, 0);
1246     OUT_BATCH(batch, 0);
1247
1248         /*DW4 Surface state base address */
1249     OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1250
1251         /*DW6. Dynamic state base address */
1252     if (gpe_context->dynamic_state.bo)
1253         OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1254                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1255                   I915_GEM_DOMAIN_RENDER, BASE_ADDRESS_MODIFY);
1256     else {
1257         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1258         OUT_BATCH(batch, 0);
1259     }
1260
1261
1262         /*DW8. Indirect Object base address */
1263     if (gpe_context->indirect_state.bo)
1264         OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1265                   I915_GEM_DOMAIN_SAMPLER,
1266                   0, BASE_ADDRESS_MODIFY);
1267     else {
1268         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1269         OUT_BATCH(batch, 0);
1270     }
1271
1272
1273         /*DW10. Instruct base address */
1274     if (gpe_context->instruction_state.bo)
1275         OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1276                   I915_GEM_DOMAIN_INSTRUCTION,
1277                   0, BASE_ADDRESS_MODIFY);
1278     else {
1279         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1280         OUT_BATCH(batch, 0);
1281     }
1282
1283
1284         /* DW12. Size limitation */
1285     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound
1286     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
1287     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
1288     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
1289
1290     /* the bindless surface state address */
1291     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1292     OUT_BATCH(batch, 0);
1293     OUT_BATCH(batch, 0xFFFFF000);
1294
1295     ADVANCE_BATCH(batch);
1296 }
1297
1298 static void
1299 gen9_gpe_select(VADriverContextP ctx,
1300                 struct i965_gpe_context *gpe_context,
1301                 struct intel_batchbuffer *batch)
1302 {
1303     BEGIN_BATCH(batch, 1);
1304     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1305                      GEN9_PIPELINE_SELECTION_MASK |
1306                      GEN9_MEDIA_DOP_GATE_OFF |
1307                      GEN9_MEDIA_DOP_GATE_MASK |
1308                      GEN9_FORCE_MEDIA_AWAKE_ON |
1309                      GEN9_FORCE_MEDIA_AWAKE_MASK);
1310     ADVANCE_BATCH(batch);
1311 }
1312
1313 void
1314 gen9_gpe_pipeline_setup(VADriverContextP ctx,
1315                         struct i965_gpe_context *gpe_context,
1316                         struct intel_batchbuffer *batch)
1317 {
1318     intel_batchbuffer_emit_mi_flush(batch);
1319
1320     gen9_gpe_select(ctx, gpe_context, batch);
1321     gen9_gpe_state_base_address(ctx, gpe_context, batch);
1322     gen8_gpe_vfe_state(ctx, gpe_context, batch);
1323     gen8_gpe_curbe_load(ctx, gpe_context, batch);
1324     gen8_gpe_idrt(ctx, gpe_context, batch);
1325 }
1326
1327 void
1328 gen9_gpe_pipeline_end(VADriverContextP ctx,
1329                       struct i965_gpe_context *gpe_context,
1330                       struct intel_batchbuffer *batch)
1331 {
1332     BEGIN_BATCH(batch, 1);
1333     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1334               GEN9_PIPELINE_SELECTION_MASK |
1335               GEN9_MEDIA_DOP_GATE_ON |
1336               GEN9_MEDIA_DOP_GATE_MASK |
1337               GEN9_FORCE_MEDIA_AWAKE_OFF |
1338               GEN9_FORCE_MEDIA_AWAKE_MASK);
1339     ADVANCE_BATCH(batch);
1340 }
1341
1342 Bool
1343 i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
1344                            struct i965_gpe_resource *res,
1345                            int size,
1346                            const char *name)
1347 {
1348     if (!res || !size)
1349         return false;
1350
1351     res->size = size;
1352     res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1353     res->map = NULL;
1354
1355     return (res->bo != NULL);
1356 }
1357
1358 void
1359 i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
1360                                        struct object_surface *obj_surface)
1361 {
1362     unsigned int swizzle;
1363
1364     res->type = I965_GPE_RESOURCE_2D;
1365     res->width = obj_surface->orig_width;
1366     res->height = obj_surface->orig_height;
1367     res->pitch = obj_surface->width;
1368     res->size = obj_surface->size;
1369     res->cb_cr_pitch = obj_surface->cb_cr_pitch;
1370     res->x_cb_offset = obj_surface->x_cb_offset;
1371     res->y_cb_offset = obj_surface->y_cb_offset;
1372     res->bo = obj_surface->bo;
1373     res->map = NULL;
1374
1375     dri_bo_reference(res->bo);
1376     dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
1377 }
1378
1379 void
1380 i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
1381                                        dri_bo *bo)
1382 {
1383     unsigned int swizzle;
1384
1385     res->type = I965_GPE_RESOURCE_BUFFER;
1386     res->width = bo->size;
1387     res->height = 1;
1388     res->pitch = res->width;
1389     res->size = res->pitch * res->width;
1390     res->bo = bo;
1391     res->map = NULL;
1392
1393     dri_bo_reference(res->bo);
1394     dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1395 }
1396
1397 void
1398 i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
1399                                        dri_bo *bo,
1400                                        unsigned int width,
1401                                        unsigned int height,
1402                                        unsigned int pitch)
1403 {
1404     unsigned int swizzle;
1405
1406     res->type = I965_GPE_RESOURCE_2D;
1407     res->width = width;
1408     res->height = height;
1409     res->pitch = pitch;
1410     res->size = res->pitch * res->width;
1411     res->bo = bo;
1412     res->map = NULL;
1413
1414     dri_bo_reference(res->bo);
1415     dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1416 }
1417
1418 void
1419 i965_zero_gpe_resource(struct i965_gpe_resource *res)
1420 {
1421     if (res->bo) {
1422         dri_bo_map(res->bo, 1);
1423         memset(res->bo->virtual, 0, res->size);
1424         dri_bo_unmap(res->bo);
1425     }
1426 }
1427
1428 void
1429 i965_free_gpe_resource(struct i965_gpe_resource *res)
1430 {
1431     dri_bo_unreference(res->bo);
1432     res->bo = NULL;
1433     res->map = NULL;
1434 }
1435
1436 void *
1437 i965_map_gpe_resource(struct i965_gpe_resource *res)
1438 {
1439     int ret;
1440
1441     if (res->bo) {
1442         ret = dri_bo_map(res->bo, 1);
1443
1444         if (ret == 0)
1445             res->map = res->bo->virtual;
1446         else
1447             res->map = NULL;
1448     } else
1449         res->map = NULL;
1450
1451     return res->map;
1452 }
1453
1454 void
1455 i965_unmap_gpe_resource(struct i965_gpe_resource *res)
1456 {
1457     if (res->bo && res->map)
1458         dri_bo_unmap(res->bo);
1459
1460     res->map = NULL;
1461 }
1462
1463 void
1464 gen9_gpe_mi_flush_dw(VADriverContextP ctx,
1465                      struct intel_batchbuffer *batch,
1466                      struct gpe_mi_flush_dw_parameter *params)
1467 {
1468     int video_pipeline_cache_invalidate = 0;
1469     int post_sync_operation = MI_FLUSH_DW_NOWRITE;
1470
1471     if (params->video_pipeline_cache_invalidate)
1472         video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
1473
1474     if (params->bo)
1475         post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
1476
1477     __OUT_BATCH(batch, (MI_FLUSH_DW2 |
1478                         video_pipeline_cache_invalidate |
1479                         post_sync_operation |
1480                         (5 - 2))); /* Always use PPGTT */
1481
1482     if (params->bo) {
1483         __OUT_RELOC64(batch,
1484                       params->bo,
1485                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1486                       params->offset);
1487     } else {
1488         __OUT_BATCH(batch, 0);
1489         __OUT_BATCH(batch, 0);
1490     }
1491
1492     __OUT_BATCH(batch, params->dw0);
1493     __OUT_BATCH(batch, params->dw1);
1494 }
1495
1496 void
1497 gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
1498                            struct intel_batchbuffer *batch,
1499                            struct gpe_mi_store_data_imm_parameter *params)
1500 {
1501     if (params->is_qword) {
1502         __OUT_BATCH(batch, MI_STORE_DATA_IMM |
1503                     (1 << 21) |
1504                     (5 - 2)); /* Always use PPGTT */
1505     } else {
1506         __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
1507     }
1508
1509     __OUT_RELOC64(batch,
1510                   params->bo,
1511                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1512                   params->offset);
1513     __OUT_BATCH(batch, params->dw0);
1514
1515     if (params->is_qword)
1516         __OUT_BATCH(batch, params->dw1);
1517 }
1518
1519 void
1520 gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
1521                                struct intel_batchbuffer *batch,
1522                                struct gpe_mi_store_register_mem_parameter *params)
1523 {
1524     __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1525     __OUT_BATCH(batch, params->mmio_offset);
1526     __OUT_RELOC64(batch,
1527                   params->bo,
1528                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1529                   params->offset);
1530 }
1531
1532 void
1533 gen9_gpe_mi_load_register_mem(VADriverContextP ctx,
1534                               struct intel_batchbuffer *batch,
1535                               struct gpe_mi_load_register_mem_parameter *params)
1536 {
1537     __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1538     __OUT_BATCH(batch, params->mmio_offset);
1539     __OUT_RELOC64(batch,
1540                   params->bo,
1541                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1542                   params->offset);
1543 }
1544
1545 void
1546 gen9_gpe_mi_load_register_imm(VADriverContextP ctx,
1547                               struct intel_batchbuffer *batch,
1548                               struct gpe_mi_load_register_imm_parameter *params)
1549 {
1550     __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
1551     __OUT_BATCH(batch, params->mmio_offset);
1552     __OUT_BATCH(batch, params->data);
1553 }
1554
1555 void
1556 gen9_gpe_mi_load_register_reg(VADriverContextP ctx,
1557                               struct intel_batchbuffer *batch,
1558                               struct gpe_mi_load_register_reg_parameter *params)
1559 {
1560     __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
1561     __OUT_BATCH(batch, params->src_mmio_offset);
1562     __OUT_BATCH(batch, params->dst_mmio_offset);
1563 }
1564
1565 void
1566 gen9_gpe_mi_math(VADriverContextP ctx,
1567                  struct intel_batchbuffer *batch,
1568                  struct gpe_mi_math_parameter *params)
1569 {
1570     __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
1571     intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
1572 }
1573
1574 void
1575 gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
1576                                          struct intel_batchbuffer *batch,
1577                                          struct gpe_mi_conditional_batch_buffer_end_parameter *params)
1578 {
1579     int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
1580
1581     if (params->compare_mask_mode_disabled)
1582         compare_mask_mode_enabled = 0;
1583
1584     __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
1585                         (1 << 21) |
1586                         compare_mask_mode_enabled |
1587                         (4 - 2))); /* Always use PPGTT */
1588     __OUT_BATCH(batch, params->compare_data);
1589     __OUT_RELOC64(batch,
1590                   params->bo,
1591                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1592                   params->offset);
1593 }
1594
1595 void
1596 gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
1597                                struct intel_batchbuffer *batch,
1598                                struct gpe_mi_batch_buffer_start_parameter *params)
1599 {
1600     __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
1601                         (!!params->is_second_level << 22) |
1602                         (!params->use_global_gtt << 8) |
1603                         (1 << 0)));
1604     __OUT_RELOC64(batch,
1605                 params->bo,
1606                 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1607                 params->offset);
1608 }
1609
1610 void
1611 gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
1612                                     struct i965_gpe_context *gpe_context,
1613                                     struct gpe_dynamic_state_parameter *ds)
1614 {
1615     if (!ds->bo || !gpe_context)
1616         return;
1617
1618     dri_bo_unreference(gpe_context->dynamic_state.bo);
1619     gpe_context->dynamic_state.bo = ds->bo;
1620     dri_bo_reference(gpe_context->dynamic_state.bo);
1621     gpe_context->dynamic_state.bo_size = ds->bo_size;
1622
1623     gpe_context->curbe_offset = ds->curbe_offset;
1624     gpe_context->idrt_offset = ds->idrt_offset;
1625     gpe_context->sampler_offset = ds->sampler_offset;
1626
1627     return;
1628 }
1629
1630 void *
1631 gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
1632 {
1633     dri_bo_map(gpe_context->dynamic_state.bo, 1);
1634
1635     return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context->curbe_offset;
1636 }
1637
1638 void
1639 gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
1640 {
1641     dri_bo_unmap(gpe_context->dynamic_state.bo);
1642 }
1643
1644 void
1645 gen9_gpe_reset_binding_table(VADriverContextP ctx,
1646                               struct i965_gpe_context *gpe_context)
1647 {
1648     unsigned int *binding_table;
1649     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
1650     int i;
1651
1652     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1653     binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
1654
1655     for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
1656         *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
1657     }
1658
1659     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1660 }
1661
1662 void
1663 gen8_gpe_setup_interface_data(VADriverContextP ctx,
1664                               struct i965_gpe_context *gpe_context)
1665 {
1666     struct gen8_interface_descriptor_data *desc;
1667     int i;
1668     dri_bo *bo;
1669     unsigned char *desc_ptr;
1670
1671     bo = gpe_context->dynamic_state.bo;
1672     dri_bo_map(bo, 1);
1673     assert(bo->virtual);
1674     desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt_offset;
1675     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1676
1677     for (i = 0; i < gpe_context->num_kernels; i++) {
1678         struct i965_kernel *kernel;
1679
1680         kernel = &gpe_context->kernels[i];
1681         assert(sizeof(*desc) == 32);
1682
1683         /*Setup the descritor table*/
1684         memset(desc, 0, sizeof(*desc));
1685         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1686         desc->desc3.sampler_count = 0;
1687         desc->desc3.sampler_state_pointer = (gpe_context->sampler_offset >> 5);
1688         desc->desc4.binding_table_entry_count = 0;
1689         desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
1690         desc->desc5.constant_urb_entry_read_offset = 0;
1691         desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1692
1693         desc++;
1694     }
1695
1696     dri_bo_unmap(bo);
1697 }
1698
1699 static void
1700 gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
1701 {
1702     switch (tiling) {
1703     case I915_TILING_NONE:
1704         ss->ss0.tiled_surface = 0;
1705         ss->ss0.tile_walk = 0;
1706         break;
1707     case I915_TILING_X:
1708         ss->ss0.tiled_surface = 1;
1709         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1710         break;
1711     case I915_TILING_Y:
1712         ss->ss0.tiled_surface = 1;
1713         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1714         break;
1715     }
1716 }
1717
1718 static void
1719 gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
1720 {
1721     switch (tiling) {
1722     case I915_TILING_NONE:
1723         ss->ss2.tiled_surface = 0;
1724         ss->ss2.tile_walk = 0;
1725         break;
1726     case I915_TILING_X:
1727         ss->ss2.tiled_surface = 1;
1728         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1729         break;
1730     case I915_TILING_Y:
1731         ss->ss2.tiled_surface = 1;
1732         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1733         break;
1734     }
1735 }
1736
1737 static void
1738 gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
1739                               unsigned int cacheability_control,
1740                               unsigned int format,
1741                               unsigned int tiling,
1742                               unsigned int width,
1743                               unsigned int height,
1744                               unsigned int pitch,
1745                               uint64_t base_offset,
1746                               unsigned int y_offset)
1747 {
1748     memset(ss, 0, sizeof(*ss));
1749
1750     /* Always set 1(align 4 mode) */
1751     ss->ss0.vertical_alignment = 1;
1752     ss->ss0.horizontal_alignment = 1;
1753
1754     ss->ss0.surface_format = format;
1755     ss->ss0.surface_type = I965_SURFACE_2D;
1756
1757     ss->ss1.surface_mocs = cacheability_control;
1758
1759     ss->ss2.width = width - 1;
1760     ss->ss2.height = height - 1;
1761
1762     ss->ss3.pitch = pitch - 1;
1763
1764     ss->ss5.y_offset = y_offset;
1765
1766     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1767     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1768     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1769     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1770
1771     ss->ss8.base_addr = (uint32_t)base_offset;
1772     ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1773
1774     gen9_gpe_set_surface_tiling(ss, tiling);
1775 }
1776
1777 /* This is only for NV12 format */
1778 static void
1779 gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
1780                                unsigned int v_direction,
1781                                unsigned int cacheability_control,
1782                                unsigned int format,
1783                                unsigned int tiling,
1784                                unsigned int width,
1785                                unsigned int height,
1786                                unsigned int pitch,
1787                                uint64_t base_offset,
1788                                unsigned int y_cb_offset)
1789 {
1790     memset(ss, 0, sizeof(*ss));
1791
1792     ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
1793     ss->ss1.width = width - 1;
1794     ss->ss1.height = height - 1;
1795
1796     ss->ss2.surface_format = format;
1797     ss->ss2.interleave_chroma = 1;
1798     ss->ss2.pitch = pitch - 1;
1799
1800     ss->ss3.y_offset_for_cb = y_cb_offset;
1801
1802     ss->ss5.surface_object_mocs = cacheability_control;
1803
1804     ss->ss6.base_addr = (uint32_t)base_offset;
1805     ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
1806
1807     gen9_gpe_set_surface2_tiling(ss, tiling);
1808 }
1809
1810 static void
1811 gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
1812                                    unsigned int cacheability_control,
1813                                    unsigned int format,
1814                                    unsigned int size,
1815                                    unsigned int pitch,
1816                                    uint64_t base_offset)
1817 {
1818     memset(ss, 0, sizeof(*ss));
1819
1820     ss->ss0.surface_format = format;
1821     ss->ss0.surface_type = I965_SURFACE_BUFFER;
1822
1823     ss->ss1.surface_mocs = cacheability_control;
1824
1825     ss->ss2.width = (size - 1) & 0x7F;
1826     ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
1827
1828     ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
1829     ss->ss3.pitch = pitch - 1;
1830
1831     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1832     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1833     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1834     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1835
1836     ss->ss8.base_addr = (uint32_t)base_offset;
1837     ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1838 }
1839
1840 void
1841 gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
1842                              struct i965_gpe_surface *gpe_surface,
1843                              int index)
1844 {
1845     char *buf;
1846     unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
1847     unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
1848         index * SURFACE_STATE_PADDED_SIZE_GEN9;
1849     unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1850         index * 4;
1851     struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
1852
1853     dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
1854
1855     dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1856     buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
1857     *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
1858
1859     if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
1860         unsigned int cbcr_offset;
1861         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1862
1863         width = gpe_resource->width;
1864         height = gpe_resource->height / 2;
1865         pitch = gpe_resource->pitch;
1866
1867         if (gpe_surface->is_media_block_rw) {
1868             if (gpe_surface->is_16bpp)
1869                 width = (ALIGN(width * 2, 4) >> 2);
1870             else
1871                 width = (ALIGN(width, 4) >> 2);
1872         }
1873
1874         if (tiling == I915_TILING_Y) {
1875             tile_alignment = 32;
1876         } else if (tiling == I915_TILING_X) {
1877             tile_alignment = 8;
1878         } else
1879             tile_alignment = 1;
1880
1881         y_offset = (gpe_resource->y_cb_offset % tile_alignment);
1882         cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
1883
1884         gen9_gpe_set_2d_surface_state(ss,
1885                                       gpe_surface->cacheability_control,
1886                                       I965_SURFACEFORMAT_R16_UINT,
1887                                       tiling,
1888                                       width, height, pitch,
1889                                       gpe_resource->bo->offset64 + cbcr_offset,
1890                                       y_offset);
1891
1892         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1893                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1894                           cbcr_offset,
1895                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1896                           gpe_resource->bo);
1897     } else if (gpe_surface->is_2d_surface) {
1898         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1899
1900         width = gpe_resource->width;
1901         height = gpe_resource->height;
1902         pitch = gpe_resource->pitch;
1903
1904         if (gpe_surface->is_media_block_rw) {
1905             if (gpe_surface->is_16bpp)
1906                 width = (ALIGN(width * 2, 4) >> 2);
1907             else
1908                 width = (ALIGN(width, 4) >> 2);
1909         }
1910
1911         gen9_gpe_set_2d_surface_state(ss,
1912                                       gpe_surface->cacheability_control,
1913                                       gpe_surface->format,
1914                                       tiling,
1915                                       width, height, pitch,
1916                                       gpe_resource->bo->offset64,
1917                                       y_offset);
1918
1919         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1920                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1921                           0,
1922                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1923                           gpe_resource->bo);
1924     } else if (gpe_surface->is_adv_surface) {
1925         struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
1926
1927         width = gpe_resource->width;
1928         height = gpe_resource->height;
1929         pitch = gpe_resource->pitch;
1930
1931         gen9_gpe_set_adv_surface_state(ss,
1932                                        gpe_surface->v_direction,
1933                                        gpe_surface->cacheability_control,
1934                                        MFX_SURFACE_PLANAR_420_8,
1935                                        tiling,
1936                                        width, height, pitch,
1937                                        gpe_resource->bo->offset64,
1938                                        gpe_resource->y_cb_offset);
1939
1940         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1941                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1942                           0,
1943                           surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
1944                           gpe_resource->bo);
1945     } else {
1946         struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1947         unsigned int format;
1948
1949         assert(gpe_surface->is_buffer);
1950
1951         if (gpe_surface->is_raw_buffer) {
1952             format = I965_SURFACEFORMAT_RAW;
1953             pitch = 1;
1954         } else {
1955             format = I965_SURFACEFORMAT_R32_UINT;
1956             pitch = sizeof(unsigned int);
1957         }
1958
1959         gen9_gpe_set_buffer2_surface_state(ss,
1960                                            gpe_surface->cacheability_control,
1961                                            format,
1962                                            gpe_surface->size,
1963                                            pitch,
1964                                            gpe_resource->bo->offset64 + gpe_surface->offset);
1965
1966         dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1967                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1968                           gpe_surface->offset,
1969                           surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1970                           gpe_resource->bo);
1971     }
1972
1973     dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1974 }
1975
1976 bool
1977 i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
1978                            struct i965_gpe_resource *res,
1979                            int width,
1980                            int height,
1981                            int pitch,
1982                            const char *name)
1983 {
1984     int bo_size;
1985
1986     if (!res)
1987         return false;
1988
1989     res->type = I965_GPE_RESOURCE_2D;
1990     res->width = width;
1991     res->height = height;
1992     res->pitch = pitch;
1993
1994     bo_size = ALIGN(height, 16) * pitch;
1995     res->size = bo_size;
1996
1997     res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1998     res->map = NULL;
1999
2000     return true;
2001 }
2002
2003 void
2004 gen8_gpe_media_state_flush(VADriverContextP ctx,
2005                            struct i965_gpe_context *gpe_context,
2006                            struct intel_batchbuffer *batch)
2007 {
2008     BEGIN_BATCH(batch, 2);
2009
2010     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
2011     OUT_BATCH(batch, 0);
2012
2013     ADVANCE_BATCH(batch);
2014 }
2015
2016 void
2017 gen8_gpe_media_object(VADriverContextP ctx,
2018                       struct i965_gpe_context *gpe_context,
2019                       struct intel_batchbuffer *batch,
2020                       struct gpe_media_object_parameter *param)
2021 {
2022     int batch_size, subdata_size;
2023
2024     batch_size = 6;
2025     subdata_size = 0;
2026     if (param->pinline_data && param->inline_size) {
2027         subdata_size = ALIGN(param->inline_size, 4);
2028         batch_size += subdata_size / 4;
2029     }
2030     BEGIN_BATCH(batch, batch_size);
2031     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
2032     OUT_BATCH(batch, param->interface_offset);
2033     OUT_BATCH(batch, param->use_scoreboard << 21);
2034     OUT_BATCH(batch, 0);
2035     OUT_BATCH(batch, (param->scoreboard_y << 16 |
2036                       param->scoreboard_x));
2037     OUT_BATCH(batch, param->scoreboard_mask);
2038
2039     if (subdata_size)
2040         intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
2041
2042     ADVANCE_BATCH(batch);
2043 }
2044
2045 void
2046 gen9_gpe_media_object_walker(VADriverContextP ctx,
2047                              struct i965_gpe_context *gpe_context,
2048                              struct intel_batchbuffer *batch,
2049                              struct gpe_media_object_walker_parameter *param)
2050 {
2051     int walker_length;
2052
2053     walker_length = 17;
2054     if (param->inline_size)
2055         walker_length += ALIGN(param->inline_size, 4) / 4;
2056     BEGIN_BATCH(batch, walker_length);
2057     OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
2058     OUT_BATCH(batch, param->interface_offset);
2059     OUT_BATCH(batch, param->use_scoreboard << 21);
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, (param->group_id_loop_select << 8 |
2063                       param->scoreboard_mask)); // DW5
2064     OUT_BATCH(batch, (param->color_count_minus1 << 24 |
2065                       param->middle_loop_extra_steps << 16 |
2066                       param->mid_loop_unit_y << 12 |
2067                       param->mid_loop_unit_x << 8));
2068     OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
2069                       (param->local_loop_exec_count & 0x3ff)));
2070     OUT_BATCH(batch, param->block_resolution.value);
2071     OUT_BATCH(batch, param->local_start.value);
2072     OUT_BATCH(batch, 0); // DW10
2073     OUT_BATCH(batch, param->local_outer_loop_stride.value);
2074     OUT_BATCH(batch, param->local_inner_loop_unit.value);
2075     OUT_BATCH(batch, param->global_resolution.value);
2076     OUT_BATCH(batch, param->global_start.value);
2077     OUT_BATCH(batch, param->global_outer_loop_stride.value);
2078     OUT_BATCH(batch, param->global_inner_loop_unit.value);
2079
2080     if (param->pinline_data && param->inline_size)
2081         intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
2082
2083     ADVANCE_BATCH(batch);
2084 }