OSDN Git Service

4c244433a7a0286977b7ebfbd1a4e28c45ff370c
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_render.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40
41 #include <va/va_drmcommon.h>
42
43 #include "intel_batchbuffer.h"
44 #include "intel_driver.h"
45 #include "i965_defines.h"
46 #include "i965_drv_video.h"
47 #include "i965_structs.h"
48 #include "i965_yuv_coefs.h"
49
50 #include "i965_render.h"
51
52 /* Programs for Gen8 */
53 static const uint32_t sf_kernel_static_gen9[][4] ={
54
55 };
56 static const uint32_t ps_kernel_static_gen9[][4] = {
57 #include "shaders/render/exa_wm_src_affine.g9b"
58 #include "shaders/render/exa_wm_src_sample_planar.g9b"
59 #include "shaders/render/exa_wm_yuv_color_balance.g9b"
60 #include "shaders/render/exa_wm_yuv_rgb.g9b"
61 #include "shaders/render/exa_wm_write.g9b"
62 };
63
64 static const uint32_t ps_subpic_kernel_static_gen9[][4] = {
65 #include "shaders/render/exa_wm_src_affine.g9b"
66 #include "shaders/render/exa_wm_src_sample_argb.g9b"
67 #include "shaders/render/exa_wm_write.g9b"
68 };
69
70
71 #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
72
73 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
74 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
75
76 enum {
77     SF_KERNEL = 0,
78     PS_KERNEL,
79     PS_SUBPIC_KERNEL
80 };
81
82 static struct i965_kernel render_kernels_gen9[] = {
83     {
84         "SF",
85         SF_KERNEL,
86         sf_kernel_static_gen9,
87         sizeof(sf_kernel_static_gen9),
88         NULL
89     },
90     {
91         "PS",
92         PS_KERNEL,
93         ps_kernel_static_gen9,
94         sizeof(ps_kernel_static_gen9),
95         NULL
96     },
97
98     {
99         "PS_SUBPIC",
100         PS_SUBPIC_KERNEL,
101         ps_subpic_kernel_static_gen9,
102         sizeof(ps_subpic_kernel_static_gen9),
103         NULL
104     }
105 };
106
107 #define URB_VS_ENTRIES        8
108 #define URB_VS_ENTRY_SIZE     1
109
110 #define URB_GS_ENTRIES        0
111 #define URB_GS_ENTRY_SIZE     0
112
113 #define URB_CLIP_ENTRIES      0
114 #define URB_CLIP_ENTRY_SIZE   0
115
116 #define URB_SF_ENTRIES        1
117 #define URB_SF_ENTRY_SIZE     2
118
119 #define URB_CS_ENTRIES        4
120 #define URB_CS_ENTRY_SIZE     4
121
122 static void
123 gen9_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
124 {
125    switch (tiling) {
126    case I915_TILING_NONE:
127       ss->ss0.tiled_surface = 0;
128       ss->ss0.tile_walk = 0;
129       break;
130    case I915_TILING_X:
131       ss->ss0.tiled_surface = 1;
132       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
133       break;
134    case I915_TILING_Y:
135       ss->ss0.tiled_surface = 1;
136       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
137       break;
138    }
139 }
140
141 /* Set "Shader Channel Select" for GEN9+ */
142 static void
143 gen9_render_set_surface_scs(struct gen8_surface_state *ss)
144 {
145     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
146     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
147     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
148     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
149 }
150
151 static void
152 gen9_render_set_surface_state(
153     struct gen8_surface_state *ss,
154     dri_bo                    *bo,
155     unsigned long              offset,
156     int                        width,
157     int                        height,
158     int                        pitch,
159     int                        format,
160     unsigned int               flags
161 )
162 {
163     unsigned int tiling;
164     unsigned int swizzle;
165
166     memset(ss, 0, sizeof(*ss));
167
168     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
169     case VA_BOTTOM_FIELD:
170         ss->ss0.vert_line_stride_ofs = 1;
171         /* fall-through */
172     case VA_TOP_FIELD:
173         ss->ss0.vert_line_stride = 1;
174         height /= 2;
175         break;
176     }
177
178     ss->ss0.surface_type = I965_SURFACE_2D;
179     ss->ss0.surface_format = format;
180
181     ss->ss8.base_addr = bo->offset + offset;
182
183     ss->ss2.width = width - 1;
184     ss->ss2.height = height - 1;
185
186     ss->ss3.pitch = pitch - 1;
187
188     /* Always set 1(align 4 mode) per B-spec */
189     ss->ss0.vertical_alignment = 1;
190     ss->ss0.horizontal_alignment = 1;
191
192     dri_bo_get_tiling(bo, &tiling, &swizzle);
193     gen9_render_set_surface_tiling(ss, tiling);
194 }
195
196 static void
197 gen9_render_src_surface_state(
198     VADriverContextP ctx,
199     int              index,
200     dri_bo          *region,
201     unsigned long    offset,
202     int              w,
203     int              h,
204     int              pitch,
205     int              format,
206     unsigned int     flags
207 )
208 {
209     struct i965_driver_data *i965 = i965_driver_data(ctx);
210     struct i965_render_state *render_state = &i965->render_state;
211     void *ss;
212     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
213
214     assert(index < MAX_RENDER_SURFACES);
215
216     dri_bo_map(ss_bo, 1);
217     assert(ss_bo->virtual);
218     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
219
220     gen9_render_set_surface_state(ss,
221                                   region, offset,
222                                   w, h,
223                                   pitch, format, flags);
224     gen9_render_set_surface_scs(ss);
225     dri_bo_emit_reloc(ss_bo,
226                       I915_GEM_DOMAIN_SAMPLER, 0,
227                       offset,
228                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
229                       region);
230
231     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
232     dri_bo_unmap(ss_bo);
233     render_state->wm.sampler_count++;
234 }
235
236 static void
237 gen9_render_src_surfaces_state(
238     VADriverContextP ctx,
239     struct object_surface *obj_surface,
240     unsigned int     flags
241 )
242 {
243     int region_pitch;
244     int rw, rh;
245     dri_bo *region;
246
247     region_pitch = obj_surface->width;
248     rw = obj_surface->orig_width;
249     rh = obj_surface->orig_height;
250     region = obj_surface->bo;
251
252     if (obj_surface->fourcc == VA_FOURCC('P', '0', '1', '0')) {
253         gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R16_UNORM, flags);     /* Y */
254         gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R16_UNORM, flags);
255         gen9_render_src_surface_state(ctx, 3, region,
256                                       region_pitch * obj_surface->y_cb_offset,
257                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
258                                       I965_SURFACEFORMAT_R16G16_UNORM, flags); /* UV */
259         gen9_render_src_surface_state(ctx, 4, region,
260                                       region_pitch * obj_surface->y_cb_offset,
261                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
262                                       I965_SURFACEFORMAT_R16G16_UNORM, flags);
263     } else {
264         gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
265         gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
266
267         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
268             gen9_render_src_surface_state(ctx, 3, region,
269                                           region_pitch * obj_surface->y_cb_offset,
270                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
271                                           I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
272             gen9_render_src_surface_state(ctx, 4, region,
273                                           region_pitch * obj_surface->y_cb_offset,
274                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
275                                           I965_SURFACEFORMAT_R8G8_UNORM, flags);
276         } else {
277             gen9_render_src_surface_state(ctx, 3, region,
278                                           region_pitch * obj_surface->y_cb_offset,
279                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
280                                           I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
281             gen9_render_src_surface_state(ctx, 4, region,
282                                           region_pitch * obj_surface->y_cb_offset,
283                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
284                                           I965_SURFACEFORMAT_R8_UNORM, flags);
285             gen9_render_src_surface_state(ctx, 5, region,
286                                           region_pitch * obj_surface->y_cr_offset,
287                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
288                                           I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
289             gen9_render_src_surface_state(ctx, 6, region,
290                                           region_pitch * obj_surface->y_cr_offset,
291                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
292                                           I965_SURFACEFORMAT_R8_UNORM, flags);
293         }
294     }
295 }
296
297 static void
298 gen9_subpic_render_src_surfaces_state(VADriverContextP ctx,
299                                       struct object_surface *obj_surface)
300 {
301     dri_bo *subpic_region;
302     unsigned int index = obj_surface->subpic_render_idx;
303     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
304     struct object_image *obj_image = obj_subpic->obj_image;
305
306     assert(obj_surface);
307     assert(obj_surface->bo);
308     subpic_region = obj_image->bo;
309     /*subpicture surface*/
310     gen9_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
311     gen9_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
312 }
313
314 static void
315 gen9_render_dest_surface_state(VADriverContextP ctx, int index)
316 {
317     struct i965_driver_data *i965 = i965_driver_data(ctx);
318     struct i965_render_state *render_state = &i965->render_state;
319     struct intel_region *dest_region = render_state->draw_region;
320     void *ss;
321     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
322     int format;
323     assert(index < MAX_RENDER_SURFACES);
324
325     if (dest_region->cpp == 2) {
326         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
327     } else {
328         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
329     }
330
331     dri_bo_map(ss_bo, 1);
332     assert(ss_bo->virtual);
333     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
334
335     gen9_render_set_surface_state(ss,
336                                   dest_region->bo, 0,
337                                   dest_region->width, dest_region->height,
338                                   dest_region->pitch, format, 0);
339     gen9_render_set_surface_scs(ss);
340     dri_bo_emit_reloc(ss_bo,
341                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
342                       0,
343                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
344                       dest_region->bo);
345
346     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
347     dri_bo_unmap(ss_bo);
348 }
349
350 static void
351 i965_fill_vertex_buffer(
352     VADriverContextP ctx,
353     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
354     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
355 )
356 {
357     struct i965_driver_data * const i965 = i965_driver_data(ctx);
358     float vb[12];
359
360     enum { X1, Y1, X2, Y2 };
361
362     static const unsigned int g_rotation_indices[][6] = {
363         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
364         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
365         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
366         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
367     };
368
369     const unsigned int * const rotation_indices =
370         g_rotation_indices[i965->rotation_attrib->value];
371
372     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
373     vb[1]  = tex_coords[rotation_indices[1]];
374     vb[2]  = vid_coords[X2];
375     vb[3]  = vid_coords[Y2];
376
377     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
378     vb[5]  = tex_coords[rotation_indices[3]];
379     vb[6]  = vid_coords[X1];
380     vb[7]  = vid_coords[Y2];
381
382     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
383     vb[9]  = tex_coords[rotation_indices[5]];
384     vb[10] = vid_coords[X1];
385     vb[11] = vid_coords[Y1];
386
387     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
388 }
389
390 static void
391 i965_subpic_render_upload_vertex(VADriverContextP ctx,
392                                  struct object_surface *obj_surface,
393                                  const VARectangle *output_rect)
394 {
395     unsigned int index = obj_surface->subpic_render_idx;
396     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
397     float tex_coords[4], vid_coords[4];
398     VARectangle dst_rect;
399
400     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
401         dst_rect = obj_subpic->dst_rect;
402     else {
403         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
404         const float sy  = (float)output_rect->height / obj_surface->orig_height;
405         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
406         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
407         dst_rect.width  = sx * obj_subpic->dst_rect.width;
408         dst_rect.height = sy * obj_subpic->dst_rect.height;
409     }
410
411     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
412     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
413     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
414     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
415
416     vid_coords[0] = dst_rect.x;
417     vid_coords[1] = dst_rect.y;
418     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
419     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
420
421     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
422 }
423
424 static void
425 i965_render_upload_vertex(
426     VADriverContextP   ctx,
427     struct object_surface *obj_surface,
428     const VARectangle *src_rect,
429     const VARectangle *dst_rect
430 )
431 {
432     struct i965_driver_data *i965 = i965_driver_data(ctx);
433     struct i965_render_state *render_state = &i965->render_state;
434     struct intel_region *dest_region = render_state->draw_region;
435     float tex_coords[4], vid_coords[4];
436     int width, height;
437
438     width  = obj_surface->orig_width;
439     height = obj_surface->orig_height;
440
441     tex_coords[0] = (float)src_rect->x / width;
442     tex_coords[1] = (float)src_rect->y / height;
443     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
444     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
445
446     vid_coords[0] = dest_region->x + dst_rect->x;
447     vid_coords[1] = dest_region->y + dst_rect->y;
448     vid_coords[2] = vid_coords[0] + dst_rect->width;
449     vid_coords[3] = vid_coords[1] + dst_rect->height;
450
451     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
452 }
453
454 static void
455 i965_render_drawing_rectangle(VADriverContextP ctx)
456 {
457     struct i965_driver_data *i965 = i965_driver_data(ctx);
458     struct intel_batchbuffer *batch = i965->batch;
459     struct i965_render_state *render_state = &i965->render_state;
460     struct intel_region *dest_region = render_state->draw_region;
461
462     BEGIN_BATCH(batch, 4);
463     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
464     OUT_BATCH(batch, 0x00000000);
465     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
466     OUT_BATCH(batch, 0x00000000);
467     ADVANCE_BATCH(batch);
468 }
469
470 static void
471 i965_render_upload_image_palette(
472     VADriverContextP ctx,
473     struct object_image *obj_image,
474     unsigned int     alpha
475 )
476 {
477     struct i965_driver_data *i965 = i965_driver_data(ctx);
478     struct intel_batchbuffer *batch = i965->batch;
479     unsigned int i;
480
481     assert(obj_image);
482
483     if (!obj_image)
484         return;
485
486     if (obj_image->image.num_palette_entries == 0)
487         return;
488
489     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
490     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
491     /*fill palette*/
492     //int32_t out[16]; //0-23:color 23-31:alpha
493     for (i = 0; i < obj_image->image.num_palette_entries; i++)
494         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
495     ADVANCE_BATCH(batch);
496 }
497
498 static void
499 gen9_clear_dest_region(VADriverContextP ctx)
500 {
501     struct i965_driver_data *i965 = i965_driver_data(ctx);
502     struct intel_batchbuffer *batch = i965->batch;
503     struct i965_render_state *render_state = &i965->render_state;
504     struct intel_region *dest_region = render_state->draw_region;
505     unsigned int blt_cmd, br13;
506     int pitch;
507
508     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
509     br13 = 0xf0 << 16;
510     pitch = dest_region->pitch;
511
512     if (dest_region->cpp == 4) {
513         br13 |= BR13_8888;
514         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
515     } else {
516         assert(dest_region->cpp == 2);
517         br13 |= BR13_565;
518     }
519
520     if (dest_region->tiling != I915_TILING_NONE) {
521         blt_cmd |= XY_COLOR_BLT_DST_TILED;
522         pitch /= 4;
523     }
524
525     br13 |= pitch;
526
527     intel_batchbuffer_start_atomic_blt(batch, 24);
528     BEGIN_BLT_BATCH(batch, 7);
529
530     OUT_BATCH(batch, blt_cmd);
531     OUT_BATCH(batch, br13);
532     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
533     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
534               (dest_region->x + dest_region->width));
535     OUT_RELOC(batch, dest_region->bo,
536               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
537               0);
538     OUT_BATCH(batch, 0x0);
539     OUT_BATCH(batch, 0x0);
540     ADVANCE_BATCH(batch);
541     intel_batchbuffer_end_atomic(batch);
542 }
543
544
545 /*
546  * for GEN8
547  */
548 #define ALIGNMENT       64
549
550 static void
551 gen9_render_initialize(VADriverContextP ctx)
552 {
553     struct i965_driver_data *i965 = i965_driver_data(ctx);
554     struct i965_render_state *render_state = &i965->render_state;
555     dri_bo *bo;
556     int size;
557     unsigned int end_offset;
558
559     /* VERTEX BUFFER */
560     dri_bo_unreference(render_state->vb.vertex_buffer);
561     bo = dri_bo_alloc(i965->intel.bufmgr,
562                       "vertex buffer",
563                       4096,
564                       4096);
565     assert(bo);
566     render_state->vb.vertex_buffer = bo;
567
568     /* WM */
569     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
570     bo = dri_bo_alloc(i965->intel.bufmgr,
571                       "surface state & binding table",
572                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
573                       4096);
574     assert(bo);
575     render_state->wm.surface_state_binding_table_bo = bo;
576
577     render_state->curbe_size = 256;
578
579     render_state->wm.sampler_count = 0;
580
581     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
582
583     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
584
585     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
586
587     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
588                         16 * sizeof(struct gen8_blend_state_rt);
589
590     render_state->sf_clip_size = 1024;
591
592     render_state->scissor_size = 1024;
593
594     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
595         ALIGN(render_state->sampler_size, ALIGNMENT) +
596         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
597         ALIGN(render_state->cc_state_size, ALIGNMENT) +
598         ALIGN(render_state->blend_state_size, ALIGNMENT) +
599         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
600         ALIGN(render_state->scissor_size, ALIGNMENT);
601
602     dri_bo_unreference(render_state->dynamic_state.bo);
603     bo = dri_bo_alloc(i965->intel.bufmgr,
604                       "dynamic_state",
605                       size,
606                       4096);
607
608     render_state->dynamic_state.bo = bo;
609
610     end_offset = 0;
611     render_state->dynamic_state.end_offset = 0;
612
613     /* Constant buffer offset */
614     render_state->curbe_offset = end_offset;
615     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
616
617     /* Sampler_state  */
618     render_state->sampler_offset = end_offset;
619     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
620
621     /* CC_VIEWPORT_state  */
622     render_state->cc_viewport_offset = end_offset;
623     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
624
625     /* CC_STATE_state  */
626     render_state->cc_state_offset = end_offset;
627     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
628
629     /* Blend_state  */
630     render_state->blend_state_offset = end_offset;
631     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
632
633     /* SF_CLIP_state  */
634     render_state->sf_clip_offset = end_offset;
635     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
636
637     /* SCISSOR_state  */
638     render_state->scissor_offset = end_offset;
639     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
640
641     /* update the end offset of dynamic_state */
642     render_state->dynamic_state.end_offset = end_offset;
643
644 }
645
646 static void
647 gen9_render_sampler(VADriverContextP ctx)
648 {
649     struct i965_driver_data *i965 = i965_driver_data(ctx);
650     struct i965_render_state *render_state = &i965->render_state;
651     struct gen8_sampler_state *sampler_state;
652     int i;
653     unsigned char *cc_ptr;
654
655     assert(render_state->wm.sampler_count > 0);
656     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
657
658     dri_bo_map(render_state->dynamic_state.bo, 1);
659     assert(render_state->dynamic_state.bo->virtual);
660
661     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
662                         render_state->sampler_offset;
663
664     sampler_state = (struct gen8_sampler_state *) cc_ptr;
665
666     for (i = 0; i < render_state->wm.sampler_count; i++) {
667         memset(sampler_state, 0, sizeof(*sampler_state));
668         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
669         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
670         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
671         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
672         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
673         sampler_state++;
674     }
675
676     dri_bo_unmap(render_state->dynamic_state.bo);
677 }
678
679 static void
680 gen9_render_blend_state(VADriverContextP ctx)
681 {
682     struct i965_driver_data *i965 = i965_driver_data(ctx);
683     struct i965_render_state *render_state = &i965->render_state;
684     struct gen8_global_blend_state *global_blend_state;
685     struct gen8_blend_state_rt *blend_state;
686     unsigned char *cc_ptr;
687
688     dri_bo_map(render_state->dynamic_state.bo, 1);
689     assert(render_state->dynamic_state.bo->virtual);
690
691     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
692                         render_state->blend_state_offset;
693
694     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
695
696     memset(global_blend_state, 0, render_state->blend_state_size);
697     /* Global blend state + blend_state for Render Target */
698     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
699     blend_state->blend1.logic_op_enable = 1;
700     blend_state->blend1.logic_op_func = 0xc;
701     blend_state->blend1.pre_blend_clamp_enable = 1;
702
703     dri_bo_unmap(render_state->dynamic_state.bo);
704 }
705
706
707 static void
708 gen9_render_cc_viewport(VADriverContextP ctx)
709 {
710     struct i965_driver_data *i965 = i965_driver_data(ctx);
711     struct i965_render_state *render_state = &i965->render_state;
712     struct i965_cc_viewport *cc_viewport;
713     unsigned char *cc_ptr;
714
715     dri_bo_map(render_state->dynamic_state.bo, 1);
716     assert(render_state->dynamic_state.bo->virtual);
717
718     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
719                         render_state->cc_viewport_offset;
720
721     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
722
723     memset(cc_viewport, 0, sizeof(*cc_viewport));
724
725     cc_viewport->min_depth = -1.e35;
726     cc_viewport->max_depth = 1.e35;
727
728     dri_bo_unmap(render_state->dynamic_state.bo);
729 }
730
731 static void
732 gen9_render_color_calc_state(VADriverContextP ctx)
733 {
734     struct i965_driver_data *i965 = i965_driver_data(ctx);
735     struct i965_render_state *render_state = &i965->render_state;
736     struct gen6_color_calc_state *color_calc_state;
737     unsigned char *cc_ptr;
738
739     dri_bo_map(render_state->dynamic_state.bo, 1);
740     assert(render_state->dynamic_state.bo->virtual);
741
742     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
743                         render_state->cc_state_offset;
744
745     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
746
747     memset(color_calc_state, 0, sizeof(*color_calc_state));
748     color_calc_state->constant_r = 1.0;
749     color_calc_state->constant_g = 0.0;
750     color_calc_state->constant_b = 1.0;
751     color_calc_state->constant_a = 1.0;
752     dri_bo_unmap(render_state->dynamic_state.bo);
753 }
754
755 #define PI  3.1415926
756
757 static void
758 gen9_render_upload_constants(VADriverContextP ctx,
759                              struct object_surface *obj_surface,
760                              unsigned int flags)
761 {
762     struct i965_driver_data *i965 = i965_driver_data(ctx);
763     struct i965_render_state *render_state = &i965->render_state;
764     unsigned short *constant_buffer;
765     unsigned char *cc_ptr;
766     float *color_balance_base;
767     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
768     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
769     float hue = (float)i965->hue_attrib->value / 180 * PI;
770     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
771     float *yuv_to_rgb;
772     unsigned int color_flag;
773     const float* yuv_coefs;
774     size_t coefs_length;
775
776     dri_bo_map(render_state->dynamic_state.bo, 1);
777     assert(render_state->dynamic_state.bo->virtual);
778
779     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
780                         render_state->curbe_offset;
781
782     constant_buffer = (unsigned short *) cc_ptr;
783
784     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
785         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
786
787         *constant_buffer = 2;
788     } else {
789         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2') ||
790             obj_surface->fourcc == VA_FOURCC('P', '0', '1', '0'))
791             *constant_buffer = 1;
792         else
793             *constant_buffer = 0;
794     }
795
796     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
797         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
798         i965->hue_attrib->value == DEFAULT_HUE &&
799         i965->saturation_attrib->value == DEFAULT_SATURATION)
800         constant_buffer[1] = 1; /* skip color balance transformation */
801     else
802         constant_buffer[1] = 0;
803
804     color_balance_base = (float *)constant_buffer + 4;
805     *color_balance_base++ = contrast;
806     *color_balance_base++ = brightness;
807     *color_balance_base++ = cos(hue) * contrast * saturation;
808     *color_balance_base++ = sin(hue) * contrast * saturation;
809
810     color_flag = flags & VA_SRC_COLOR_MASK;
811     yuv_to_rgb = (float *)constant_buffer + 8;
812
813     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(color_flag),
814                                              &coefs_length);
815     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
816
817     dri_bo_unmap(render_state->dynamic_state.bo);
818 }
819
820 static void
821 gen9_render_setup_states(
822     VADriverContextP   ctx,
823     struct object_surface *obj_surface,
824     const VARectangle *src_rect,
825     const VARectangle *dst_rect,
826     unsigned int       flags
827 )
828 {
829     gen9_render_dest_surface_state(ctx, 0);
830     gen9_render_src_surfaces_state(ctx, obj_surface, flags);
831     gen9_render_sampler(ctx);
832     gen9_render_cc_viewport(ctx);
833     gen9_render_color_calc_state(ctx);
834     gen9_render_blend_state(ctx);
835     gen9_render_upload_constants(ctx, obj_surface, flags);
836     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
837 }
838
839 static void
840 gen9_emit_state_base_address(VADriverContextP ctx)
841 {
842     struct i965_driver_data *i965 = i965_driver_data(ctx);
843     struct intel_batchbuffer *batch = i965->batch;
844     struct i965_render_state *render_state = &i965->render_state;
845
846     BEGIN_BATCH(batch, 19);
847     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
848     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
849         OUT_BATCH(batch, 0);
850         OUT_BATCH(batch, 0);
851         /*DW4 */
852     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
853         OUT_BATCH(batch, 0);
854
855         /*DW6*/
856     /* Dynamic state base address */
857     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
858                 0, BASE_ADDRESS_MODIFY);
859     OUT_BATCH(batch, 0);
860
861         /*DW8*/
862     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
863     OUT_BATCH(batch, 0);
864
865         /*DW10 */
866     /* Instruction base address */
867     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
868     OUT_BATCH(batch, 0);
869
870         /*DW12 */
871     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
872     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
873     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
874     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
875
876     /* the bindless surface state address */
877     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
878     OUT_BATCH(batch, 0);
879     OUT_BATCH(batch, 0xFFFF0000);
880     ADVANCE_BATCH(batch);
881 }
882
883 static void
884 gen9_emit_cc_state_pointers(VADriverContextP ctx)
885 {
886     struct i965_driver_data *i965 = i965_driver_data(ctx);
887     struct intel_batchbuffer *batch = i965->batch;
888     struct i965_render_state *render_state = &i965->render_state;
889
890     BEGIN_BATCH(batch, 2);
891     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
892     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
893     ADVANCE_BATCH(batch);
894
895     BEGIN_BATCH(batch, 2);
896     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
897     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
898     ADVANCE_BATCH(batch);
899
900 }
901
902 static void
903 gen9_emit_vertices(VADriverContextP ctx)
904 {
905     struct i965_driver_data *i965 = i965_driver_data(ctx);
906     struct intel_batchbuffer *batch = i965->batch;
907     struct i965_render_state *render_state = &i965->render_state;
908
909     BEGIN_BATCH(batch, 5);
910     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
911     OUT_BATCH(batch,
912               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
913               (0 << GEN8_VB0_MOCS_SHIFT) |
914               GEN7_VB0_ADDRESS_MODIFYENABLE |
915               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
916     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
917     OUT_BATCH(batch, 0);
918     OUT_BATCH(batch, 12 * 4);
919     ADVANCE_BATCH(batch);
920
921     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
922     BEGIN_BATCH(batch, 2);
923     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
924     OUT_BATCH(batch,
925               _3DPRIM_RECTLIST);
926     ADVANCE_BATCH(batch);
927
928     OUT_BATCH(batch, GEN7_3DSTATE_VF | (2 - 2));
929     OUT_BATCH(batch, 0);
930
931     OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
932     OUT_BATCH(batch, 0);
933     OUT_BATCH(batch, 0);
934
935     OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
936     OUT_BATCH(batch, 0);
937
938     BEGIN_BATCH(batch, 7);
939     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
940     OUT_BATCH(batch,
941               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
942     OUT_BATCH(batch, 3); /* vertex count per instance */
943     OUT_BATCH(batch, 0); /* start vertex offset */
944     OUT_BATCH(batch, 1); /* single instance */
945     OUT_BATCH(batch, 0); /* start instance location */
946     OUT_BATCH(batch, 0);
947     ADVANCE_BATCH(batch);
948 }
949
950 static void
951 gen9_emit_vertex_element_state(VADriverContextP ctx)
952 {
953     struct i965_driver_data *i965 = i965_driver_data(ctx);
954     struct intel_batchbuffer *batch = i965->batch;
955     int i;
956
957     /*
958      * The VUE layout
959      * dword 0-3: pad (0, 0, 0. 0)
960      * dword 4-7: position (x, y, 1.0, 1.0),
961      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
962      */
963
964     /* Set up our vertex elements, sourced from the single vertex buffer. */
965     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
966
967     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
968      * We don't really know or care what they do.
969      */
970
971     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
972               GEN8_VE0_VALID |
973               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
974               (0 << VE0_OFFSET_SHIFT));
975     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
976               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
977               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
978               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
979
980     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
981     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
982               GEN8_VE0_VALID |
983               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
984               (8 << VE0_OFFSET_SHIFT));
985     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
986               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
987               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
988               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
989
990     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
991     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
992               GEN8_VE0_VALID |
993               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
994               (0 << VE0_OFFSET_SHIFT));
995     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
996               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
997               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
998               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
999
1000     /* Disable instancing for all vertex elements. */
1001     for (i = 0; i < 3; i++) {
1002         OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
1003         OUT_BATCH(batch, i);
1004         OUT_BATCH(batch, 0);
1005     }
1006 }
1007
1008 static void
1009 gen9_emit_vs_state(VADriverContextP ctx)
1010 {
1011     struct i965_driver_data *i965 = i965_driver_data(ctx);
1012     struct intel_batchbuffer *batch = i965->batch;
1013
1014     /* disable VS constant buffer */
1015     BEGIN_BATCH(batch, 11);
1016     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1017     OUT_BATCH(batch, 0);
1018     OUT_BATCH(batch, 0);
1019     /* CS Buffer 0 */
1020     OUT_BATCH(batch, 0);
1021     OUT_BATCH(batch, 0);
1022     /* CS Buffer 1 */
1023     OUT_BATCH(batch, 0);
1024     OUT_BATCH(batch, 0);
1025     /* CS Buffer 2 */
1026     OUT_BATCH(batch, 0);
1027     OUT_BATCH(batch, 0);
1028     /* CS Buffer 3 */
1029     OUT_BATCH(batch, 0);
1030     OUT_BATCH(batch, 0);
1031     ADVANCE_BATCH(batch);
1032
1033     BEGIN_BATCH(batch, 9);
1034     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1035     OUT_BATCH(batch, 0); /* without VS kernel */
1036     OUT_BATCH(batch, 0);
1037     /* VS shader dispatch flag */
1038     OUT_BATCH(batch, 0);
1039     OUT_BATCH(batch, 0);
1040     OUT_BATCH(batch, 0);
1041     /* DW6. VS shader GRF and URB buffer definition */
1042     OUT_BATCH(batch, 0);
1043     OUT_BATCH(batch, 0); /* pass-through */
1044     OUT_BATCH(batch, 0);
1045     ADVANCE_BATCH(batch);
1046
1047     BEGIN_BATCH(batch, 2);
1048     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1049     OUT_BATCH(batch, 0);
1050     ADVANCE_BATCH(batch);
1051
1052     BEGIN_BATCH(batch, 2);
1053     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1054     OUT_BATCH(batch, 0);
1055     ADVANCE_BATCH(batch);
1056
1057 }
1058
1059 /*
1060  * URB layout on GEN8
1061  * ----------------------------------------
1062  * | PS Push Constants (8KB) | VS entries |
1063  * ----------------------------------------
1064  */
1065 static void
1066 gen9_emit_urb(VADriverContextP ctx)
1067 {
1068     struct i965_driver_data *i965 = i965_driver_data(ctx);
1069     struct intel_batchbuffer *batch = i965->batch;
1070     unsigned int num_urb_entries = 64;
1071
1072     /* The minimum urb entries is 64 */
1073
1074     BEGIN_BATCH(batch, 2);
1075     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1076     OUT_BATCH(batch, 0);
1077     ADVANCE_BATCH(batch);
1078
1079     BEGIN_BATCH(batch, 2);
1080     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1081     OUT_BATCH(batch, 0);
1082     ADVANCE_BATCH(batch);
1083
1084     BEGIN_BATCH(batch, 2);
1085     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1086     OUT_BATCH(batch, 0);
1087     ADVANCE_BATCH(batch);
1088
1089     BEGIN_BATCH(batch, 2);
1090     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1091     OUT_BATCH(batch, 0);
1092     ADVANCE_BATCH(batch);
1093
1094     /* Size is 8Kbs and base address is 0Kb */
1095     BEGIN_BATCH(batch, 2);
1096     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1097     /* Size is 8Kbs and base address is 0Kb */
1098     OUT_BATCH(batch,
1099                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1100                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1101     ADVANCE_BATCH(batch);
1102
1103     BEGIN_BATCH(batch, 2);
1104     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1105     OUT_BATCH(batch,
1106               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1107               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1108               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1109    ADVANCE_BATCH(batch);
1110
1111    BEGIN_BATCH(batch, 2);
1112    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1113    OUT_BATCH(batch,
1114              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1115              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1116    ADVANCE_BATCH(batch);
1117
1118    BEGIN_BATCH(batch, 2);
1119    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1120    OUT_BATCH(batch,
1121              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1122              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1123    ADVANCE_BATCH(batch);
1124
1125    BEGIN_BATCH(batch, 2);
1126    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1127    OUT_BATCH(batch,
1128              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1129              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1130    ADVANCE_BATCH(batch);
1131 }
1132
1133 static void
1134 gen9_emit_bypass_state(VADriverContextP ctx)
1135 {
1136     struct i965_driver_data *i965 = i965_driver_data(ctx);
1137     struct intel_batchbuffer *batch = i965->batch;
1138
1139     /* bypass GS */
1140     BEGIN_BATCH(batch, 11);
1141     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1142     OUT_BATCH(batch, 0);
1143     OUT_BATCH(batch, 0);
1144     OUT_BATCH(batch, 0);
1145     OUT_BATCH(batch, 0);
1146     OUT_BATCH(batch, 0);
1147     OUT_BATCH(batch, 0);
1148     OUT_BATCH(batch, 0);
1149     OUT_BATCH(batch, 0);
1150     OUT_BATCH(batch, 0);
1151     OUT_BATCH(batch, 0);
1152     ADVANCE_BATCH(batch);
1153
1154     BEGIN_BATCH(batch, 10);
1155     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1156     /* GS shader address */
1157     OUT_BATCH(batch, 0); /* without GS kernel */
1158     OUT_BATCH(batch, 0);
1159     /* DW3. GS shader dispatch flag */
1160     OUT_BATCH(batch, 0);
1161     OUT_BATCH(batch, 0);
1162     OUT_BATCH(batch, 0);
1163     /* DW6. GS shader GRF and URB offset/length */
1164     OUT_BATCH(batch, 0);
1165     OUT_BATCH(batch, 0); /* pass-through */
1166     OUT_BATCH(batch, 0);
1167     OUT_BATCH(batch, 0);
1168     ADVANCE_BATCH(batch);
1169
1170     BEGIN_BATCH(batch, 2);
1171     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1172     OUT_BATCH(batch, 0);
1173     ADVANCE_BATCH(batch);
1174
1175     BEGIN_BATCH(batch, 2);
1176     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1177     OUT_BATCH(batch, 0);
1178     ADVANCE_BATCH(batch);
1179
1180     /* disable HS */
1181     BEGIN_BATCH(batch, 11);
1182     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1183     OUT_BATCH(batch, 0);
1184     OUT_BATCH(batch, 0);
1185     OUT_BATCH(batch, 0);
1186     OUT_BATCH(batch, 0);
1187     OUT_BATCH(batch, 0);
1188     OUT_BATCH(batch, 0);
1189     OUT_BATCH(batch, 0);
1190     OUT_BATCH(batch, 0);
1191     OUT_BATCH(batch, 0);
1192     OUT_BATCH(batch, 0);
1193     ADVANCE_BATCH(batch);
1194
1195     BEGIN_BATCH(batch, 9);
1196     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1197     OUT_BATCH(batch, 0);
1198     /*DW2. HS pass-through */
1199     OUT_BATCH(batch, 0);
1200     /*DW3. HS shader address */
1201     OUT_BATCH(batch, 0);
1202     OUT_BATCH(batch, 0);
1203     /*DW5. HS shader flag. URB offset/length and so on */
1204     OUT_BATCH(batch, 0);
1205     OUT_BATCH(batch, 0);
1206     OUT_BATCH(batch, 0);
1207     OUT_BATCH(batch, 0);
1208     ADVANCE_BATCH(batch);
1209
1210     BEGIN_BATCH(batch, 2);
1211     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1212     OUT_BATCH(batch, 0);
1213     ADVANCE_BATCH(batch);
1214
1215     BEGIN_BATCH(batch, 2);
1216     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1217     OUT_BATCH(batch, 0);
1218     ADVANCE_BATCH(batch);
1219
1220     /* Disable TE */
1221     BEGIN_BATCH(batch, 4);
1222     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1223     OUT_BATCH(batch, 0);
1224     OUT_BATCH(batch, 0);
1225     OUT_BATCH(batch, 0);
1226     ADVANCE_BATCH(batch);
1227
1228     /* Disable DS */
1229     BEGIN_BATCH(batch, 11);
1230     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1231     OUT_BATCH(batch, 0);
1232     OUT_BATCH(batch, 0);
1233     OUT_BATCH(batch, 0);
1234     OUT_BATCH(batch, 0);
1235     OUT_BATCH(batch, 0);
1236     OUT_BATCH(batch, 0);
1237     OUT_BATCH(batch, 0);
1238     OUT_BATCH(batch, 0);
1239     OUT_BATCH(batch, 0);
1240     OUT_BATCH(batch, 0);
1241     ADVANCE_BATCH(batch);
1242
1243     BEGIN_BATCH(batch, 11);
1244     OUT_BATCH(batch, GEN7_3DSTATE_DS | (11 - 2));
1245     /* DW1. DS shader pointer */
1246     OUT_BATCH(batch, 0);
1247     OUT_BATCH(batch, 0);
1248     /* DW3-5. DS shader dispatch flag.*/
1249     OUT_BATCH(batch, 0);
1250     OUT_BATCH(batch, 0);
1251     OUT_BATCH(batch, 0);
1252     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1253     OUT_BATCH(batch, 0);
1254     OUT_BATCH(batch, 0);
1255     /* DW8. DS shader output URB */
1256     OUT_BATCH(batch, 0);
1257
1258     /* Dual-patch kernel start pointer */
1259     OUT_BATCH(batch, 0);
1260     OUT_BATCH(batch, 0);
1261     ADVANCE_BATCH(batch);
1262
1263     BEGIN_BATCH(batch, 2);
1264     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1265     OUT_BATCH(batch, 0);
1266     ADVANCE_BATCH(batch);
1267
1268     BEGIN_BATCH(batch, 2);
1269     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1270     OUT_BATCH(batch, 0);
1271     ADVANCE_BATCH(batch);
1272
1273     /* Disable STREAMOUT */
1274     BEGIN_BATCH(batch, 5);
1275     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1276     OUT_BATCH(batch, 0);
1277     OUT_BATCH(batch, 0);
1278     OUT_BATCH(batch, 0);
1279     OUT_BATCH(batch, 0);
1280     ADVANCE_BATCH(batch);
1281 }
1282
1283 static void
1284 gen9_emit_invarient_states(VADriverContextP ctx)
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct intel_batchbuffer *batch = i965->batch;
1288
1289     BEGIN_BATCH(batch, 1);
1290     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D |
1291                      GEN9_PIPELINE_SELECTION_MASK);
1292     ADVANCE_BATCH(batch);
1293
1294     BEGIN_BATCH(batch, 2);
1295     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1296     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1297               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1298     ADVANCE_BATCH(batch);
1299
1300     /* Update 3D Multisample pattern */
1301     BEGIN_BATCH(batch, 9);
1302     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1303     OUT_BATCH(batch, 0);
1304     OUT_BATCH(batch, 0);
1305     OUT_BATCH(batch, 0);
1306     OUT_BATCH(batch, 0);
1307     OUT_BATCH(batch, 0);
1308     OUT_BATCH(batch, 0);
1309     OUT_BATCH(batch, 0);
1310     OUT_BATCH(batch, 0);
1311     ADVANCE_BATCH(batch);
1312
1313
1314     BEGIN_BATCH(batch, 2);
1315     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1316     OUT_BATCH(batch, 1);
1317     ADVANCE_BATCH(batch);
1318
1319     /* Set system instruction pointer */
1320     BEGIN_BATCH(batch, 3);
1321     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1322     OUT_BATCH(batch, 0);
1323     OUT_BATCH(batch, 0);
1324     ADVANCE_BATCH(batch);
1325 }
1326
1327 static void
1328 gen9_emit_clip_state(VADriverContextP ctx)
1329 {
1330     struct i965_driver_data *i965 = i965_driver_data(ctx);
1331     struct intel_batchbuffer *batch = i965->batch;
1332
1333     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1334     OUT_BATCH(batch, 0);
1335     OUT_BATCH(batch, 0); /* pass-through */
1336     OUT_BATCH(batch, 0);
1337 }
1338
1339 static void
1340 gen9_emit_sf_state(VADriverContextP ctx)
1341 {
1342     struct i965_driver_data *i965 = i965_driver_data(ctx);
1343     struct intel_batchbuffer *batch = i965->batch;
1344
1345     BEGIN_BATCH(batch, 5);
1346     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1347     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1348     OUT_BATCH(batch, 0);
1349     OUT_BATCH(batch, 0);
1350     OUT_BATCH(batch, 0);
1351     ADVANCE_BATCH(batch);
1352
1353
1354     BEGIN_BATCH(batch, 6);
1355     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (6 - 2));
1356     OUT_BATCH(batch,
1357               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1358               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1359               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1360               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1361               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1362     OUT_BATCH(batch, 0);
1363     OUT_BATCH(batch, 0);
1364     OUT_BATCH(batch, GEN9_SBE_ACTIVE_COMPONENT_XYZW);
1365     OUT_BATCH(batch, 0);
1366     ADVANCE_BATCH(batch);
1367
1368     /* SBE for backend setup */
1369     BEGIN_BATCH(batch, 11);
1370     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1371     OUT_BATCH(batch, 0);
1372     OUT_BATCH(batch, 0);
1373     OUT_BATCH(batch, 0);
1374     OUT_BATCH(batch, 0);
1375     OUT_BATCH(batch, 0);
1376     OUT_BATCH(batch, 0);
1377     OUT_BATCH(batch, 0);
1378     OUT_BATCH(batch, 0);
1379     OUT_BATCH(batch, 0);
1380     OUT_BATCH(batch, 0);
1381     ADVANCE_BATCH(batch);
1382
1383     BEGIN_BATCH(batch, 4);
1384     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1385     OUT_BATCH(batch, 0);
1386     OUT_BATCH(batch, 0);
1387     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1388     ADVANCE_BATCH(batch);
1389 }
1390
1391 static void
1392 gen9_emit_wm_state(VADriverContextP ctx, int kernel)
1393 {
1394     struct i965_driver_data *i965 = i965_driver_data(ctx);
1395     struct intel_batchbuffer *batch = i965->batch;
1396     struct i965_render_state *render_state = &i965->render_state;
1397     unsigned int num_samples = 0;
1398     unsigned int max_threads;
1399
1400     max_threads = i965->intel.device_info->max_wm_threads - 2;
1401
1402     BEGIN_BATCH(batch, 2);
1403     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1404     OUT_BATCH(batch,
1405               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1406     ADVANCE_BATCH(batch);
1407
1408     if (kernel == PS_KERNEL) {
1409         BEGIN_BATCH(batch, 2);
1410         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1411         OUT_BATCH(batch,
1412                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1413         ADVANCE_BATCH(batch);
1414     } else if (kernel == PS_SUBPIC_KERNEL) {
1415         BEGIN_BATCH(batch, 2);
1416         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1417         OUT_BATCH(batch,
1418                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1419                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1420                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1421                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1422                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1423                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1424         ADVANCE_BATCH(batch);
1425     }
1426
1427     BEGIN_BATCH(batch, 2);
1428     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1429     OUT_BATCH(batch,
1430               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1431     ADVANCE_BATCH(batch);
1432
1433     BEGIN_BATCH(batch, 11);
1434     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1435     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1436     OUT_BATCH(batch, 0);
1437     /*DW3-4. Constant buffer 0 */
1438     OUT_BATCH(batch, render_state->curbe_offset);
1439     OUT_BATCH(batch, 0);
1440
1441     /*DW5-10. Constant buffer 1-3 */
1442     OUT_BATCH(batch, 0);
1443     OUT_BATCH(batch, 0);
1444     OUT_BATCH(batch, 0);
1445     OUT_BATCH(batch, 0);
1446     OUT_BATCH(batch, 0);
1447     OUT_BATCH(batch, 0);
1448     ADVANCE_BATCH(batch);
1449
1450     BEGIN_BATCH(batch, 12);
1451     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1452     /* PS shader address */
1453     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1454
1455     OUT_BATCH(batch, 0);
1456     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1457     OUT_BATCH(batch,
1458               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1459               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
1460               GEN7_PS_VECTOR_MASK_ENABLE);
1461     /* DW4-5. Scatch space */
1462     OUT_BATCH(batch, 0); /* scratch space base offset */
1463     OUT_BATCH(batch, 0);
1464     /* DW6. PS shader threads. */
1465     OUT_BATCH(batch,
1466               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1467               GEN7_PS_PUSH_CONSTANT_ENABLE |
1468               GEN7_PS_16_DISPATCH_ENABLE);
1469     /* DW7. PS shader GRF */
1470     OUT_BATCH(batch,
1471               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1472     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1473     OUT_BATCH(batch, 0);
1474     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1475     OUT_BATCH(batch, 0);
1476     ADVANCE_BATCH(batch);
1477
1478     BEGIN_BATCH(batch, 2);
1479     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1480     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1481     ADVANCE_BATCH(batch);
1482 }
1483
1484 static void
1485 gen9_emit_depth_buffer_state(VADriverContextP ctx)
1486 {
1487     struct i965_driver_data *i965 = i965_driver_data(ctx);
1488     struct intel_batchbuffer *batch = i965->batch;
1489
1490     BEGIN_BATCH(batch, 8);
1491     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1492     OUT_BATCH(batch,
1493               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1494               (I965_SURFACE_NULL << 29));
1495     /* DW2-3. Depth Buffer Address */
1496     OUT_BATCH(batch, 0);
1497     OUT_BATCH(batch, 0);
1498     /* DW4-7. Surface structure */
1499     OUT_BATCH(batch, 0);
1500     OUT_BATCH(batch, 0);
1501     OUT_BATCH(batch, 0);
1502     OUT_BATCH(batch, 0);
1503     ADVANCE_BATCH(batch);
1504
1505     /* Update the Hier Depth buffer */
1506     BEGIN_BATCH(batch, 5);
1507     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1508     OUT_BATCH(batch, 0);
1509     OUT_BATCH(batch, 0);
1510     OUT_BATCH(batch, 0);
1511     OUT_BATCH(batch, 0);
1512     ADVANCE_BATCH(batch);
1513
1514     /* Update the stencil buffer */
1515     BEGIN_BATCH(batch, 5);
1516     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1517     OUT_BATCH(batch, 0);
1518     OUT_BATCH(batch, 0);
1519     OUT_BATCH(batch, 0);
1520     OUT_BATCH(batch, 0);
1521     ADVANCE_BATCH(batch);
1522
1523     BEGIN_BATCH(batch, 3);
1524     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1525     OUT_BATCH(batch, 0);
1526     OUT_BATCH(batch, 0);
1527     ADVANCE_BATCH(batch);
1528 }
1529
1530 static void
1531 gen9_emit_depth_stencil_state(VADriverContextP ctx)
1532 {
1533     struct i965_driver_data *i965 = i965_driver_data(ctx);
1534     struct intel_batchbuffer *batch = i965->batch;
1535
1536     BEGIN_BATCH(batch, 3);
1537     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1538     OUT_BATCH(batch, 0);
1539     OUT_BATCH(batch, 0);
1540     ADVANCE_BATCH(batch);
1541 }
1542
1543 static void
1544 gen9_emit_wm_hz_op(VADriverContextP ctx)
1545 {
1546     struct i965_driver_data *i965 = i965_driver_data(ctx);
1547     struct intel_batchbuffer *batch = i965->batch;
1548
1549     BEGIN_BATCH(batch, 5);
1550     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1551     OUT_BATCH(batch, 0);
1552     OUT_BATCH(batch, 0);
1553     OUT_BATCH(batch, 0);
1554     OUT_BATCH(batch, 0);
1555     ADVANCE_BATCH(batch);
1556 }
1557
1558 static void
1559 gen9_emit_viewport_state_pointers(VADriverContextP ctx)
1560 {
1561     struct i965_driver_data *i965 = i965_driver_data(ctx);
1562     struct intel_batchbuffer *batch = i965->batch;
1563     struct i965_render_state *render_state = &i965->render_state;
1564
1565     BEGIN_BATCH(batch, 2);
1566     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1567     OUT_BATCH(batch, render_state->cc_viewport_offset);
1568     ADVANCE_BATCH(batch);
1569
1570     BEGIN_BATCH(batch, 2);
1571     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1572     OUT_BATCH(batch, 0);
1573     ADVANCE_BATCH(batch);
1574 }
1575
1576 static void
1577 gen9_emit_sampler_state_pointers(VADriverContextP ctx)
1578 {
1579     struct i965_driver_data *i965 = i965_driver_data(ctx);
1580     struct intel_batchbuffer *batch = i965->batch;
1581     struct i965_render_state *render_state = &i965->render_state;
1582
1583     BEGIN_BATCH(batch, 2);
1584     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1585     OUT_BATCH(batch, render_state->sampler_offset);
1586     ADVANCE_BATCH(batch);
1587 }
1588
1589
1590 static void
1591 gen9_emit_drawing_rectangle(VADriverContextP ctx)
1592 {
1593     i965_render_drawing_rectangle(ctx);
1594 }
1595
1596 static void
1597 gen9_render_emit_states(VADriverContextP ctx, int kernel)
1598 {
1599     struct i965_driver_data *i965 = i965_driver_data(ctx);
1600     struct intel_batchbuffer *batch = i965->batch;
1601
1602     intel_batchbuffer_start_atomic(batch, 0x1000);
1603     intel_batchbuffer_emit_mi_flush(batch);
1604     gen9_emit_invarient_states(ctx);
1605     gen9_emit_state_base_address(ctx);
1606     gen9_emit_viewport_state_pointers(ctx);
1607     gen9_emit_urb(ctx);
1608     gen9_emit_cc_state_pointers(ctx);
1609     gen9_emit_sampler_state_pointers(ctx);
1610     gen9_emit_wm_hz_op(ctx);
1611     gen9_emit_bypass_state(ctx);
1612     gen9_emit_vs_state(ctx);
1613     gen9_emit_clip_state(ctx);
1614     gen9_emit_sf_state(ctx);
1615     gen9_emit_depth_stencil_state(ctx);
1616     gen9_emit_wm_state(ctx, kernel);
1617     gen9_emit_depth_buffer_state(ctx);
1618     gen9_emit_drawing_rectangle(ctx);
1619     gen9_emit_vertex_element_state(ctx);
1620     gen9_emit_vertices(ctx);
1621     intel_batchbuffer_end_atomic(batch);
1622 }
1623
1624 static void
1625 gen9_render_put_surface(
1626     VADriverContextP   ctx,
1627     struct object_surface *obj_surface,
1628     const VARectangle *src_rect,
1629     const VARectangle *dst_rect,
1630     unsigned int       flags
1631 )
1632 {
1633     struct i965_driver_data *i965 = i965_driver_data(ctx);
1634     struct intel_batchbuffer *batch = i965->batch;
1635
1636     gen9_render_initialize(ctx);
1637     gen9_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1638     gen9_clear_dest_region(ctx);
1639     gen9_render_emit_states(ctx, PS_KERNEL);
1640     intel_batchbuffer_flush(batch);
1641 }
1642
1643 static void
1644 gen9_subpicture_render_blend_state(VADriverContextP ctx)
1645 {
1646     struct i965_driver_data *i965 = i965_driver_data(ctx);
1647     struct i965_render_state *render_state = &i965->render_state;
1648     struct gen8_global_blend_state *global_blend_state;
1649     struct gen8_blend_state_rt *blend_state;
1650     unsigned char *cc_ptr;
1651
1652     dri_bo_map(render_state->dynamic_state.bo, 1);
1653     assert(render_state->dynamic_state.bo->virtual);
1654
1655     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1656                         render_state->blend_state_offset;
1657
1658     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1659
1660     memset(global_blend_state, 0, render_state->blend_state_size);
1661     /* Global blend state + blend_state for Render Target */
1662     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1663     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1664     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1665     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1666     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1667     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1668     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1669     blend_state->blend0.colorbuf_blend = 1;
1670     blend_state->blend1.post_blend_clamp_enable = 1;
1671     blend_state->blend1.pre_blend_clamp_enable = 1;
1672     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1673
1674     dri_bo_unmap(render_state->dynamic_state.bo);
1675 }
1676
1677 static void
1678 gen9_subpic_render_upload_constants(VADriverContextP ctx,
1679                                     struct object_surface *obj_surface)
1680 {
1681     struct i965_driver_data *i965 = i965_driver_data(ctx);
1682     struct i965_render_state *render_state = &i965->render_state;
1683     float *constant_buffer;
1684     float global_alpha = 1.0;
1685     unsigned int index = obj_surface->subpic_render_idx;
1686     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1687     unsigned char *cc_ptr;
1688
1689     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1690         global_alpha = obj_subpic->global_alpha;
1691     }
1692
1693
1694     dri_bo_map(render_state->dynamic_state.bo, 1);
1695     assert(render_state->dynamic_state.bo->virtual);
1696
1697     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1698                                 render_state->curbe_offset;
1699
1700     constant_buffer = (float *) cc_ptr;
1701     *constant_buffer = global_alpha;
1702
1703     dri_bo_unmap(render_state->dynamic_state.bo);
1704 }
1705
1706 static void
1707 gen9_subpicture_render_setup_states(
1708     VADriverContextP   ctx,
1709     struct object_surface *obj_surface,
1710     const VARectangle *src_rect,
1711     const VARectangle *dst_rect
1712 )
1713 {
1714     gen9_render_dest_surface_state(ctx, 0);
1715     gen9_subpic_render_src_surfaces_state(ctx, obj_surface);
1716     gen9_render_sampler(ctx);
1717     gen9_render_cc_viewport(ctx);
1718     gen9_render_color_calc_state(ctx);
1719     gen9_subpicture_render_blend_state(ctx);
1720     gen9_subpic_render_upload_constants(ctx, obj_surface);
1721     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1722 }
1723
1724 static void
1725 gen9_render_put_subpicture(
1726     VADriverContextP   ctx,
1727     struct object_surface *obj_surface,
1728     const VARectangle *src_rect,
1729     const VARectangle *dst_rect
1730 )
1731 {
1732     struct i965_driver_data *i965 = i965_driver_data(ctx);
1733     struct intel_batchbuffer *batch = i965->batch;
1734     unsigned int index = obj_surface->subpic_render_idx;
1735     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1736
1737     assert(obj_subpic);
1738     gen9_render_initialize(ctx);
1739     gen9_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1740     gen9_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1741     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1742     intel_batchbuffer_flush(batch);
1743 }
1744
1745 static void
1746 gen9_render_terminate(VADriverContextP ctx)
1747 {
1748     struct i965_driver_data *i965 = i965_driver_data(ctx);
1749     struct i965_render_state *render_state = &i965->render_state;
1750
1751     dri_bo_unreference(render_state->vb.vertex_buffer);
1752     render_state->vb.vertex_buffer = NULL;
1753
1754     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1755     render_state->wm.surface_state_binding_table_bo = NULL;
1756
1757     if (render_state->instruction_state.bo) {
1758         dri_bo_unreference(render_state->instruction_state.bo);
1759         render_state->instruction_state.bo = NULL;
1760     }
1761
1762     if (render_state->dynamic_state.bo) {
1763         dri_bo_unreference(render_state->dynamic_state.bo);
1764         render_state->dynamic_state.bo = NULL;
1765     }
1766
1767     if (render_state->indirect_state.bo) {
1768         dri_bo_unreference(render_state->indirect_state.bo);
1769         render_state->indirect_state.bo = NULL;
1770     }
1771
1772     if (render_state->draw_region) {
1773         dri_bo_unreference(render_state->draw_region->bo);
1774         free(render_state->draw_region);
1775         render_state->draw_region = NULL;
1776     }
1777 }
1778
1779 bool
1780 gen9_render_init(VADriverContextP ctx)
1781 {
1782     struct i965_driver_data *i965 = i965_driver_data(ctx);
1783     struct i965_render_state *render_state = &i965->render_state;
1784     int i, kernel_size;
1785     unsigned int kernel_offset, end_offset;
1786     unsigned char *kernel_ptr;
1787     struct i965_kernel *kernel;
1788
1789     render_state->render_put_surface = gen9_render_put_surface;
1790     render_state->render_put_subpicture = gen9_render_put_subpicture;
1791     render_state->render_terminate = gen9_render_terminate;
1792
1793     memcpy(render_state->render_kernels, render_kernels_gen9,
1794                         sizeof(render_state->render_kernels));
1795
1796     kernel_size = 4096;
1797
1798     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1799         kernel = &render_state->render_kernels[i];
1800
1801         if (!kernel->size)
1802             continue;
1803
1804         kernel_size += ALIGN(kernel->size, ALIGNMENT);
1805     }
1806
1807     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1808                                   "kernel shader",
1809                                   kernel_size,
1810                                   0x1000);
1811     if (render_state->instruction_state.bo == NULL) {
1812         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1813         return false;
1814     }
1815
1816     assert(render_state->instruction_state.bo);
1817
1818     render_state->instruction_state.bo_size = kernel_size;
1819     render_state->instruction_state.end_offset = 0;
1820     end_offset = 0;
1821
1822     dri_bo_map(render_state->instruction_state.bo, 1);
1823     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1824     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1825         kernel = &render_state->render_kernels[i];
1826         kernel_offset = end_offset;
1827         kernel->kernel_offset = kernel_offset;
1828
1829         if (!kernel->size)
1830             continue;
1831
1832         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1833
1834         end_offset += ALIGN(kernel->size, ALIGNMENT);
1835     }
1836
1837     render_state->instruction_state.end_offset = end_offset;
1838
1839     dri_bo_unmap(render_state->instruction_state.bo);
1840
1841     return true;
1842 }