OSDN Git Service

Unify the coding style in the driver
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_render.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40
41 #include <va/va_drmcommon.h>
42
43 #include "intel_batchbuffer.h"
44 #include "intel_driver.h"
45 #include "i965_defines.h"
46 #include "i965_drv_video.h"
47 #include "i965_structs.h"
48 #include "i965_yuv_coefs.h"
49
50 #include "i965_render.h"
51
52 #define SF_KERNEL_NUM_GRF       16
53 #define SF_MAX_THREADS          1
54
55 #define PS_KERNEL_NUM_GRF       48
56 #define PS_MAX_THREADS          32
57
58 /* Programs for Gen8 */
59 static const uint32_t sf_kernel_static_gen8[][4] = {
60
61 };
62 static const uint32_t ps_kernel_static_gen8[][4] = {
63 #include "shaders/render/exa_wm_src_affine.g8b"
64 #include "shaders/render/exa_wm_src_sample_planar.g8b"
65 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
66 #include "shaders/render/exa_wm_yuv_rgb.g8b"
67 #include "shaders/render/exa_wm_write.g8b"
68 };
69
70 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
71 #include "shaders/render/exa_wm_src_affine.g8b"
72 #include "shaders/render/exa_wm_src_sample_argb.g8b"
73 #include "shaders/render/exa_wm_write.g8b"
74 };
75
76
77 #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
78
79 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
80 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
81
82 enum {
83     SF_KERNEL = 0,
84     PS_KERNEL,
85     PS_SUBPIC_KERNEL
86 };
87
88 static struct i965_kernel render_kernels_gen8[] = {
89     {
90         "SF",
91         SF_KERNEL,
92         sf_kernel_static_gen8,
93         sizeof(sf_kernel_static_gen8),
94         NULL
95     },
96     {
97         "PS",
98         PS_KERNEL,
99         ps_kernel_static_gen8,
100         sizeof(ps_kernel_static_gen8),
101         NULL
102     },
103
104     {
105         "PS_SUBPIC",
106         PS_SUBPIC_KERNEL,
107         ps_subpic_kernel_static_gen8,
108         sizeof(ps_subpic_kernel_static_gen8),
109         NULL
110     }
111 };
112
113 #define URB_VS_ENTRIES        8
114 #define URB_VS_ENTRY_SIZE     1
115
116 #define URB_GS_ENTRIES        0
117 #define URB_GS_ENTRY_SIZE     0
118
119 #define URB_CLIP_ENTRIES      0
120 #define URB_CLIP_ENTRY_SIZE   0
121
122 #define URB_SF_ENTRIES        1
123 #define URB_SF_ENTRY_SIZE     2
124
125 #define URB_CS_ENTRIES        4
126 #define URB_CS_ENTRY_SIZE     4
127
128 static void
129 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
130 {
131     switch (tiling) {
132     case I915_TILING_NONE:
133         ss->ss0.tiled_surface = 0;
134         ss->ss0.tile_walk = 0;
135         break;
136     case I915_TILING_X:
137         ss->ss0.tiled_surface = 1;
138         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
139         break;
140     case I915_TILING_Y:
141         ss->ss0.tiled_surface = 1;
142         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
143         break;
144     }
145 }
146
147 /* Set "Shader Channel Select" for GEN8+ */
148 void
149 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
150 {
151     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
152     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
153     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
154     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
155 }
156
157 static void
158 gen8_render_set_surface_state(
159     struct gen8_surface_state *ss,
160     dri_bo                    *bo,
161     unsigned long              offset,
162     int                        width,
163     int                        height,
164     int                        pitch,
165     int                        format,
166     unsigned int               flags
167 )
168 {
169     unsigned int tiling;
170     unsigned int swizzle;
171
172     memset(ss, 0, sizeof(*ss));
173
174     switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
175     case VA_BOTTOM_FIELD:
176         ss->ss0.vert_line_stride_ofs = 1;
177         /* fall-through */
178     case VA_TOP_FIELD:
179         ss->ss0.vert_line_stride = 1;
180         height /= 2;
181         break;
182     }
183
184     ss->ss0.surface_type = I965_SURFACE_2D;
185     ss->ss0.surface_format = format;
186
187     ss->ss8.base_addr = bo->offset + offset;
188
189     ss->ss2.width = width - 1;
190     ss->ss2.height = height - 1;
191
192     ss->ss3.pitch = pitch - 1;
193
194     /* Always set 1(align 4 mode) per B-spec */
195     ss->ss0.vertical_alignment = 1;
196     ss->ss0.horizontal_alignment = 1;
197
198     dri_bo_get_tiling(bo, &tiling, &swizzle);
199     gen8_render_set_surface_tiling(ss, tiling);
200 }
201
202 static void
203 gen8_render_src_surface_state(
204     VADriverContextP ctx,
205     int              index,
206     dri_bo          *region,
207     unsigned long    offset,
208     int              w,
209     int              h,
210     int              pitch,
211     int              format,
212     unsigned int     flags
213 )
214 {
215     struct i965_driver_data *i965 = i965_driver_data(ctx);
216     struct i965_render_state *render_state = &i965->render_state;
217     void *ss;
218     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
219
220     assert(index < MAX_RENDER_SURFACES);
221
222     dri_bo_map(ss_bo, 1);
223     assert(ss_bo->virtual);
224     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
225
226     gen8_render_set_surface_state(ss,
227                                   region, offset,
228                                   w, h,
229                                   pitch, format, flags);
230     gen8_render_set_surface_scs(ss);
231     dri_bo_emit_reloc(ss_bo,
232                       I915_GEM_DOMAIN_SAMPLER, 0,
233                       offset,
234                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
235                       region);
236
237     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
238     dri_bo_unmap(ss_bo);
239     render_state->wm.sampler_count++;
240 }
241
242 static void
243 gen8_render_src_surfaces_state(
244     VADriverContextP ctx,
245     struct object_surface *obj_surface,
246     unsigned int     flags
247 )
248 {
249     int region_pitch;
250     int rw, rh;
251     dri_bo *region;
252
253     region_pitch = obj_surface->width;
254     rw = obj_surface->orig_width;
255     rh = obj_surface->orig_height;
256     region = obj_surface->bo;
257
258     gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
259     gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
260
261     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
262         return;
263
264     if (obj_surface->fourcc == VA_FOURCC_NV12) {
265         gen8_render_src_surface_state(ctx, 3, region,
266                                       region_pitch * obj_surface->y_cb_offset,
267                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
268                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
269         gen8_render_src_surface_state(ctx, 4, region,
270                                       region_pitch * obj_surface->y_cb_offset,
271                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
272                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
273     } else {
274         gen8_render_src_surface_state(ctx, 3, region,
275                                       region_pitch * obj_surface->y_cb_offset,
276                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
277                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
278         gen8_render_src_surface_state(ctx, 4, region,
279                                       region_pitch * obj_surface->y_cb_offset,
280                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
281                                       I965_SURFACEFORMAT_R8_UNORM, flags);
282         gen8_render_src_surface_state(ctx, 5, region,
283                                       region_pitch * obj_surface->y_cr_offset,
284                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
285                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
286         gen8_render_src_surface_state(ctx, 6, region,
287                                       region_pitch * obj_surface->y_cr_offset,
288                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
289                                       I965_SURFACEFORMAT_R8_UNORM, flags);
290     }
291 }
292
293 static void
294 gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
295                                       struct object_surface *obj_surface)
296 {
297     dri_bo *subpic_region;
298     unsigned int index = obj_surface->subpic_render_idx;
299     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
300     struct object_image *obj_image = obj_subpic->obj_image;
301
302     assert(obj_surface);
303     assert(obj_surface->bo);
304     subpic_region = obj_image->bo;
305     /*subpicture surface*/
306     gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
307     gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
308 }
309
310 static void
311 gen8_render_dest_surface_state(VADriverContextP ctx, int index)
312 {
313     struct i965_driver_data *i965 = i965_driver_data(ctx);
314     struct i965_render_state *render_state = &i965->render_state;
315     struct intel_region *dest_region = render_state->draw_region;
316     void *ss;
317     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
318     int format;
319     assert(index < MAX_RENDER_SURFACES);
320
321     if (dest_region->cpp == 2) {
322         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
323     } else {
324         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
325     }
326
327     dri_bo_map(ss_bo, 1);
328     assert(ss_bo->virtual);
329     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
330
331     gen8_render_set_surface_state(ss,
332                                   dest_region->bo, 0,
333                                   dest_region->width, dest_region->height,
334                                   dest_region->pitch, format, 0);
335     gen8_render_set_surface_scs(ss);
336     dri_bo_emit_reloc(ss_bo,
337                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
338                       0,
339                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
340                       dest_region->bo);
341
342     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
343     dri_bo_unmap(ss_bo);
344 }
345
346 static void
347 i965_fill_vertex_buffer(
348     VADriverContextP ctx,
349     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
350     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
351 )
352 {
353     struct i965_driver_data * const i965 = i965_driver_data(ctx);
354     float vb[12];
355
356     enum { X1, Y1, X2, Y2 };
357
358     static const unsigned int g_rotation_indices[][6] = {
359         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
360         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
361         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
362         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
363     };
364
365     const unsigned int * const rotation_indices =
366         g_rotation_indices[i965->rotation_attrib->value];
367
368     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
369     vb[1]  = tex_coords[rotation_indices[1]];
370     vb[2]  = vid_coords[X2];
371     vb[3]  = vid_coords[Y2];
372
373     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
374     vb[5]  = tex_coords[rotation_indices[3]];
375     vb[6]  = vid_coords[X1];
376     vb[7]  = vid_coords[Y2];
377
378     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
379     vb[9]  = tex_coords[rotation_indices[5]];
380     vb[10] = vid_coords[X1];
381     vb[11] = vid_coords[Y1];
382
383     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
384 }
385
386 static void
387 i965_subpic_render_upload_vertex(VADriverContextP ctx,
388                                  struct object_surface *obj_surface,
389                                  const VARectangle *output_rect)
390 {
391     unsigned int index = obj_surface->subpic_render_idx;
392     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
393     float tex_coords[4], vid_coords[4];
394     VARectangle dst_rect;
395
396     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
397         dst_rect = obj_subpic->dst_rect;
398     else {
399         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
400         const float sy  = (float)output_rect->height / obj_surface->orig_height;
401         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
402         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
403         dst_rect.width  = sx * obj_subpic->dst_rect.width;
404         dst_rect.height = sy * obj_subpic->dst_rect.height;
405     }
406
407     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
408     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
409     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
410     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
411
412     vid_coords[0] = dst_rect.x;
413     vid_coords[1] = dst_rect.y;
414     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
415     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
416
417     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
418 }
419
420 static void
421 i965_render_upload_vertex(
422     VADriverContextP   ctx,
423     struct object_surface *obj_surface,
424     const VARectangle *src_rect,
425     const VARectangle *dst_rect
426 )
427 {
428     struct i965_driver_data *i965 = i965_driver_data(ctx);
429     struct i965_render_state *render_state = &i965->render_state;
430     struct intel_region *dest_region = render_state->draw_region;
431     float tex_coords[4], vid_coords[4];
432     int width, height;
433
434     width  = obj_surface->orig_width;
435     height = obj_surface->orig_height;
436
437     tex_coords[0] = (float)src_rect->x / width;
438     tex_coords[1] = (float)src_rect->y / height;
439     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
440     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
441
442     vid_coords[0] = dest_region->x + dst_rect->x;
443     vid_coords[1] = dest_region->y + dst_rect->y;
444     vid_coords[2] = vid_coords[0] + dst_rect->width;
445     vid_coords[3] = vid_coords[1] + dst_rect->height;
446
447     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
448 }
449
450 static void
451 i965_render_drawing_rectangle(VADriverContextP ctx)
452 {
453     struct i965_driver_data *i965 = i965_driver_data(ctx);
454     struct intel_batchbuffer *batch = i965->batch;
455     struct i965_render_state *render_state = &i965->render_state;
456     struct intel_region *dest_region = render_state->draw_region;
457
458     BEGIN_BATCH(batch, 4);
459     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
460     OUT_BATCH(batch, 0x00000000);
461     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
462     OUT_BATCH(batch, 0x00000000);
463     ADVANCE_BATCH(batch);
464 }
465
466 static void
467 i965_render_upload_image_palette(
468     VADriverContextP ctx,
469     struct object_image *obj_image,
470     unsigned int     alpha
471 )
472 {
473     struct i965_driver_data *i965 = i965_driver_data(ctx);
474     struct intel_batchbuffer *batch = i965->batch;
475     unsigned int i;
476
477     assert(obj_image);
478
479     if (!obj_image)
480         return;
481
482     if (obj_image->image.num_palette_entries == 0)
483         return;
484
485     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
486     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
487     /*fill palette*/
488     //int32_t out[16]; //0-23:color 23-31:alpha
489     for (i = 0; i < obj_image->image.num_palette_entries; i++)
490         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
491     ADVANCE_BATCH(batch);
492 }
493
494 static void
495 gen8_clear_dest_region(VADriverContextP ctx)
496 {
497     struct i965_driver_data *i965 = i965_driver_data(ctx);
498     struct intel_batchbuffer *batch = i965->batch;
499     struct i965_render_state *render_state = &i965->render_state;
500     struct intel_region *dest_region = render_state->draw_region;
501     unsigned int blt_cmd, br13;
502     int pitch;
503
504     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
505     br13 = 0xf0 << 16;
506     pitch = dest_region->pitch;
507
508     if (dest_region->cpp == 4) {
509         br13 |= BR13_8888;
510         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
511     } else {
512         assert(dest_region->cpp == 2);
513         br13 |= BR13_565;
514     }
515
516     if (dest_region->tiling != I915_TILING_NONE) {
517         blt_cmd |= XY_COLOR_BLT_DST_TILED;
518         pitch /= 4;
519     }
520
521     br13 |= pitch;
522
523     intel_batchbuffer_start_atomic_blt(batch, 24);
524     BEGIN_BLT_BATCH(batch, 7);
525
526     OUT_BATCH(batch, blt_cmd);
527     OUT_BATCH(batch, br13);
528     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
529     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
530               (dest_region->x + dest_region->width));
531     OUT_RELOC(batch, dest_region->bo,
532               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
533               0);
534     OUT_BATCH(batch, 0x0);
535     OUT_BATCH(batch, 0x0);
536     ADVANCE_BATCH(batch);
537     intel_batchbuffer_end_atomic(batch);
538 }
539
540
541 /*
542  * for GEN8
543  */
544 #define ALIGNMENT       64
545
546 static void
547 gen8_render_initialize(VADriverContextP ctx)
548 {
549     struct i965_driver_data *i965 = i965_driver_data(ctx);
550     struct i965_render_state *render_state = &i965->render_state;
551     dri_bo *bo;
552     int size;
553     unsigned int end_offset;
554
555     /* VERTEX BUFFER */
556     dri_bo_unreference(render_state->vb.vertex_buffer);
557     bo = dri_bo_alloc(i965->intel.bufmgr,
558                       "vertex buffer",
559                       4096,
560                       4096);
561     assert(bo);
562     render_state->vb.vertex_buffer = bo;
563
564     /* WM */
565     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
566     bo = dri_bo_alloc(i965->intel.bufmgr,
567                       "surface state & binding table",
568                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
569                       4096);
570     assert(bo);
571     render_state->wm.surface_state_binding_table_bo = bo;
572
573     render_state->curbe_size = 256;
574
575     render_state->wm.sampler_count = 0;
576
577     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
578
579     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
580
581     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
582
583     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
584                                      16 * sizeof(struct gen8_blend_state_rt);
585
586     render_state->sf_clip_size = 1024;
587
588     render_state->scissor_size = 1024;
589
590     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
591            ALIGN(render_state->sampler_size, ALIGNMENT) +
592            ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
593            ALIGN(render_state->cc_state_size, ALIGNMENT) +
594            ALIGN(render_state->blend_state_size, ALIGNMENT) +
595            ALIGN(render_state->sf_clip_size, ALIGNMENT) +
596            ALIGN(render_state->scissor_size, ALIGNMENT);
597
598     dri_bo_unreference(render_state->dynamic_state.bo);
599     bo = dri_bo_alloc(i965->intel.bufmgr,
600                       "dynamic_state",
601                       size,
602                       4096);
603
604     render_state->dynamic_state.bo = bo;
605
606     end_offset = 0;
607     render_state->dynamic_state.end_offset = 0;
608
609     /* Constant buffer offset */
610     render_state->curbe_offset = end_offset;
611     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
612
613     /* Sampler_state  */
614     render_state->sampler_offset = end_offset;
615     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
616
617     /* CC_VIEWPORT_state  */
618     render_state->cc_viewport_offset = end_offset;
619     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
620
621     /* CC_STATE_state  */
622     render_state->cc_state_offset = end_offset;
623     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
624
625     /* Blend_state  */
626     render_state->blend_state_offset = end_offset;
627     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
628
629     /* SF_CLIP_state  */
630     render_state->sf_clip_offset = end_offset;
631     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
632
633     /* SCISSOR_state  */
634     render_state->scissor_offset = end_offset;
635     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
636
637     /* update the end offset of dynamic_state */
638     render_state->dynamic_state.end_offset = end_offset;
639
640 }
641
642 static void
643 gen8_render_sampler(VADriverContextP ctx)
644 {
645     struct i965_driver_data *i965 = i965_driver_data(ctx);
646     struct i965_render_state *render_state = &i965->render_state;
647     struct gen8_sampler_state *sampler_state;
648     int i;
649     unsigned char *cc_ptr;
650
651     assert(render_state->wm.sampler_count > 0);
652     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
653
654     dri_bo_map(render_state->dynamic_state.bo, 1);
655     assert(render_state->dynamic_state.bo->virtual);
656
657     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
658              render_state->sampler_offset;
659
660     sampler_state = (struct gen8_sampler_state *) cc_ptr;
661
662     for (i = 0; i < render_state->wm.sampler_count; i++) {
663         memset(sampler_state, 0, sizeof(*sampler_state));
664         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
665         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
666         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
667         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
668         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
669         sampler_state++;
670     }
671
672     dri_bo_unmap(render_state->dynamic_state.bo);
673 }
674
675 static void
676 gen8_render_blend_state(VADriverContextP ctx)
677 {
678     struct i965_driver_data *i965 = i965_driver_data(ctx);
679     struct i965_render_state *render_state = &i965->render_state;
680     struct gen8_global_blend_state *global_blend_state;
681     struct gen8_blend_state_rt *blend_state;
682     unsigned char *cc_ptr;
683
684     dri_bo_map(render_state->dynamic_state.bo, 1);
685     assert(render_state->dynamic_state.bo->virtual);
686
687     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
688              render_state->blend_state_offset;
689
690     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
691
692     memset(global_blend_state, 0, render_state->blend_state_size);
693     /* Global blend state + blend_state for Render Target */
694     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
695     blend_state->blend1.logic_op_enable = 1;
696     blend_state->blend1.logic_op_func = 0xc;
697     blend_state->blend1.pre_blend_clamp_enable = 1;
698
699     dri_bo_unmap(render_state->dynamic_state.bo);
700 }
701
702
703 static void
704 gen8_render_cc_viewport(VADriverContextP ctx)
705 {
706     struct i965_driver_data *i965 = i965_driver_data(ctx);
707     struct i965_render_state *render_state = &i965->render_state;
708     struct i965_cc_viewport *cc_viewport;
709     unsigned char *cc_ptr;
710
711     dri_bo_map(render_state->dynamic_state.bo, 1);
712     assert(render_state->dynamic_state.bo->virtual);
713
714     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
715              render_state->cc_viewport_offset;
716
717     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
718
719     memset(cc_viewport, 0, sizeof(*cc_viewport));
720
721     cc_viewport->min_depth = -1.e35;
722     cc_viewport->max_depth = 1.e35;
723
724     dri_bo_unmap(render_state->dynamic_state.bo);
725 }
726
727 static void
728 gen8_render_color_calc_state(VADriverContextP ctx)
729 {
730     struct i965_driver_data *i965 = i965_driver_data(ctx);
731     struct i965_render_state *render_state = &i965->render_state;
732     struct gen6_color_calc_state *color_calc_state;
733     unsigned char *cc_ptr;
734
735     dri_bo_map(render_state->dynamic_state.bo, 1);
736     assert(render_state->dynamic_state.bo->virtual);
737
738     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
739              render_state->cc_state_offset;
740
741     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
742
743     memset(color_calc_state, 0, sizeof(*color_calc_state));
744     color_calc_state->constant_r = 1.0;
745     color_calc_state->constant_g = 0.0;
746     color_calc_state->constant_b = 1.0;
747     color_calc_state->constant_a = 1.0;
748     dri_bo_unmap(render_state->dynamic_state.bo);
749 }
750
751 #define PI  3.1415926
752
753 static void
754 gen8_render_upload_constants(VADriverContextP ctx,
755                              struct object_surface *obj_surface,
756                              unsigned int flags)
757 {
758     struct i965_driver_data *i965 = i965_driver_data(ctx);
759     struct i965_render_state *render_state = &i965->render_state;
760     unsigned short *constant_buffer;
761     unsigned char *cc_ptr;
762     float *color_balance_base;
763     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
764     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
765     float hue = (float)i965->hue_attrib->value / 180 * PI;
766     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
767     float *yuv_to_rgb;
768     unsigned int color_flag;
769     const float* yuv_coefs;
770     size_t coefs_length;
771
772     dri_bo_map(render_state->dynamic_state.bo, 1);
773     assert(render_state->dynamic_state.bo->virtual);
774
775     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
776              render_state->curbe_offset;
777
778     constant_buffer = (unsigned short *) cc_ptr;
779
780     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
781         assert(obj_surface->fourcc == VA_FOURCC_Y800);
782
783         *constant_buffer = 2;
784     } else {
785         if (obj_surface->fourcc == VA_FOURCC_NV12)
786             *constant_buffer = 1;
787         else
788             *constant_buffer = 0;
789     }
790
791     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
792         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
793         i965->hue_attrib->value == DEFAULT_HUE &&
794         i965->saturation_attrib->value == DEFAULT_SATURATION)
795         constant_buffer[1] = 1; /* skip color balance transformation */
796     else
797         constant_buffer[1] = 0;
798
799     color_balance_base = (float *)constant_buffer + 4;
800     *color_balance_base++ = contrast;
801     *color_balance_base++ = brightness;
802     *color_balance_base++ = cos(hue) * contrast * saturation;
803     *color_balance_base++ = sin(hue) * contrast * saturation;
804
805     color_flag = flags & VA_SRC_COLOR_MASK;
806     yuv_to_rgb = (float *)constant_buffer + 8;
807
808     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(color_flag),
809                                              &coefs_length);
810     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
811
812     dri_bo_unmap(render_state->dynamic_state.bo);
813 }
814
815 static void
816 gen8_render_setup_states(
817     VADriverContextP   ctx,
818     struct object_surface *obj_surface,
819     const VARectangle *src_rect,
820     const VARectangle *dst_rect,
821     unsigned int       flags
822 )
823 {
824     gen8_render_dest_surface_state(ctx, 0);
825     gen8_render_src_surfaces_state(ctx, obj_surface, flags);
826     gen8_render_sampler(ctx);
827     gen8_render_cc_viewport(ctx);
828     gen8_render_color_calc_state(ctx);
829     gen8_render_blend_state(ctx);
830     gen8_render_upload_constants(ctx, obj_surface, flags);
831     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
832 }
833
834 static void
835 gen8_emit_state_base_address(VADriverContextP ctx)
836 {
837     struct i965_driver_data *i965 = i965_driver_data(ctx);
838     struct intel_batchbuffer *batch = i965->batch;
839     struct i965_render_state *render_state = &i965->render_state;
840
841     BEGIN_BATCH(batch, 16);
842     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
843     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
844     OUT_BATCH(batch, 0);
845     OUT_BATCH(batch, 0);
846     /*DW4 */
847     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
848     OUT_BATCH(batch, 0);
849
850     /*DW6*/
851     /* Dynamic state base address */
852     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
853               0, BASE_ADDRESS_MODIFY);
854     OUT_BATCH(batch, 0);
855
856     /*DW8*/
857     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
858     OUT_BATCH(batch, 0);
859
860     /*DW10 */
861     /* Instruction base address */
862     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
863     OUT_BATCH(batch, 0);
864
865     /*DW12 */
866     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
867     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
868     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
869     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
870     ADVANCE_BATCH(batch);
871 }
872
873 static void
874 gen8_emit_cc_state_pointers(VADriverContextP ctx)
875 {
876     struct i965_driver_data *i965 = i965_driver_data(ctx);
877     struct intel_batchbuffer *batch = i965->batch;
878     struct i965_render_state *render_state = &i965->render_state;
879
880     BEGIN_BATCH(batch, 2);
881     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
882     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
883     ADVANCE_BATCH(batch);
884
885     BEGIN_BATCH(batch, 2);
886     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
887     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
888     ADVANCE_BATCH(batch);
889
890 }
891
892 static void
893 gen8_emit_vertices(VADriverContextP ctx)
894 {
895     struct i965_driver_data *i965 = i965_driver_data(ctx);
896     struct intel_batchbuffer *batch = i965->batch;
897     struct i965_render_state *render_state = &i965->render_state;
898
899     BEGIN_BATCH(batch, 5);
900     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
901     OUT_BATCH(batch,
902               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
903               (0 << GEN8_VB0_MOCS_SHIFT) |
904               GEN7_VB0_ADDRESS_MODIFYENABLE |
905               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
906     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
907     OUT_BATCH(batch, 0);
908     OUT_BATCH(batch, 12 * 4);
909     ADVANCE_BATCH(batch);
910
911     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
912     BEGIN_BATCH(batch, 2);
913     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
914     OUT_BATCH(batch,
915               _3DPRIM_RECTLIST);
916     ADVANCE_BATCH(batch);
917
918     BEGIN_BATCH(batch, 7);
919     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
920     OUT_BATCH(batch,
921               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
922     OUT_BATCH(batch, 3); /* vertex count per instance */
923     OUT_BATCH(batch, 0); /* start vertex offset */
924     OUT_BATCH(batch, 1); /* single instance */
925     OUT_BATCH(batch, 0); /* start instance location */
926     OUT_BATCH(batch, 0);
927     ADVANCE_BATCH(batch);
928 }
929
930 static void
931 gen8_emit_vertex_element_state(VADriverContextP ctx)
932 {
933     struct i965_driver_data *i965 = i965_driver_data(ctx);
934     struct intel_batchbuffer *batch = i965->batch;
935     int i;
936
937     /*
938      * The VUE layout
939      * dword 0-3: pad (0, 0, 0. 0)
940      * dword 4-7: position (x, y, 1.0, 1.0),
941      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
942      */
943
944     /* Set up our vertex elements, sourced from the single vertex buffer. */
945     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
946
947     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
948      * We don't really know or care what they do.
949      */
950
951     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
952               GEN8_VE0_VALID |
953               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
954               (0 << VE0_OFFSET_SHIFT));
955     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
956               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
957               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
958               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
959
960     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
961     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
962               GEN8_VE0_VALID |
963               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
964               (8 << VE0_OFFSET_SHIFT));
965     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
966               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
967               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
968               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
969
970     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
971     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
972               GEN8_VE0_VALID |
973               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
974               (0 << VE0_OFFSET_SHIFT));
975     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
976               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
977               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
978               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
979
980     /* Disable instancing for all vertex elements. */
981     for (i = 0; i < 3; i++) {
982         OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
983         OUT_BATCH(batch, i);
984         OUT_BATCH(batch, 0);
985     }
986
987     /* Disable system-generated values. */
988     OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
989     OUT_BATCH(batch, 0);
990 }
991
992 static void
993 gen8_emit_vs_state(VADriverContextP ctx)
994 {
995     struct i965_driver_data *i965 = i965_driver_data(ctx);
996     struct intel_batchbuffer *batch = i965->batch;
997
998     /* disable VS constant buffer */
999     BEGIN_BATCH(batch, 11);
1000     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1001     OUT_BATCH(batch, 0);
1002     OUT_BATCH(batch, 0);
1003     /* CS Buffer 0 */
1004     OUT_BATCH(batch, 0);
1005     OUT_BATCH(batch, 0);
1006     /* CS Buffer 1 */
1007     OUT_BATCH(batch, 0);
1008     OUT_BATCH(batch, 0);
1009     /* CS Buffer 2 */
1010     OUT_BATCH(batch, 0);
1011     OUT_BATCH(batch, 0);
1012     /* CS Buffer 3 */
1013     OUT_BATCH(batch, 0);
1014     OUT_BATCH(batch, 0);
1015     ADVANCE_BATCH(batch);
1016
1017     BEGIN_BATCH(batch, 9);
1018     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1019     OUT_BATCH(batch, 0); /* without VS kernel */
1020     OUT_BATCH(batch, 0);
1021     /* VS shader dispatch flag */
1022     OUT_BATCH(batch, 0);
1023     OUT_BATCH(batch, 0);
1024     OUT_BATCH(batch, 0);
1025     /* DW6. VS shader GRF and URB buffer definition */
1026     OUT_BATCH(batch, 0);
1027     OUT_BATCH(batch, 0); /* pass-through */
1028     OUT_BATCH(batch, 0);
1029     ADVANCE_BATCH(batch);
1030
1031     BEGIN_BATCH(batch, 2);
1032     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1033     OUT_BATCH(batch, 0);
1034     ADVANCE_BATCH(batch);
1035
1036     BEGIN_BATCH(batch, 2);
1037     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1038     OUT_BATCH(batch, 0);
1039     ADVANCE_BATCH(batch);
1040
1041 }
1042
1043 /*
1044  * URB layout on GEN8
1045  * ----------------------------------------
1046  * | PS Push Constants (8KB) | VS entries |
1047  * ----------------------------------------
1048  */
1049 static void
1050 gen8_emit_urb(VADriverContextP ctx)
1051 {
1052     struct i965_driver_data *i965 = i965_driver_data(ctx);
1053     struct intel_batchbuffer *batch = i965->batch;
1054     unsigned int num_urb_entries = 64;
1055
1056     /* The minimum urb entries is 64 */
1057
1058     BEGIN_BATCH(batch, 2);
1059     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1060     OUT_BATCH(batch, 0);
1061     ADVANCE_BATCH(batch);
1062
1063     BEGIN_BATCH(batch, 2);
1064     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1065     OUT_BATCH(batch, 0);
1066     ADVANCE_BATCH(batch);
1067
1068     BEGIN_BATCH(batch, 2);
1069     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1070     OUT_BATCH(batch, 0);
1071     ADVANCE_BATCH(batch);
1072
1073     BEGIN_BATCH(batch, 2);
1074     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1075     OUT_BATCH(batch, 0);
1076     ADVANCE_BATCH(batch);
1077
1078     /* Size is 8Kbs and base address is 0Kb */
1079     BEGIN_BATCH(batch, 2);
1080     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1081     /* Size is 8Kbs and base address is 0Kb */
1082     OUT_BATCH(batch,
1083               (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1084               (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1085     ADVANCE_BATCH(batch);
1086
1087     BEGIN_BATCH(batch, 2);
1088     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1089     OUT_BATCH(batch,
1090               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1091               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1092               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1093     ADVANCE_BATCH(batch);
1094
1095     BEGIN_BATCH(batch, 2);
1096     OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1097     OUT_BATCH(batch,
1098               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1099               (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1100     ADVANCE_BATCH(batch);
1101
1102     BEGIN_BATCH(batch, 2);
1103     OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1104     OUT_BATCH(batch,
1105               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1106               (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1107     ADVANCE_BATCH(batch);
1108
1109     BEGIN_BATCH(batch, 2);
1110     OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1111     OUT_BATCH(batch,
1112               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1113               (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1114     ADVANCE_BATCH(batch);
1115 }
1116
1117 static void
1118 gen8_emit_bypass_state(VADriverContextP ctx)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct intel_batchbuffer *batch = i965->batch;
1122
1123     /* bypass GS */
1124     BEGIN_BATCH(batch, 11);
1125     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1126     OUT_BATCH(batch, 0);
1127     OUT_BATCH(batch, 0);
1128     OUT_BATCH(batch, 0);
1129     OUT_BATCH(batch, 0);
1130     OUT_BATCH(batch, 0);
1131     OUT_BATCH(batch, 0);
1132     OUT_BATCH(batch, 0);
1133     OUT_BATCH(batch, 0);
1134     OUT_BATCH(batch, 0);
1135     OUT_BATCH(batch, 0);
1136     ADVANCE_BATCH(batch);
1137
1138     BEGIN_BATCH(batch, 10);
1139     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1140     /* GS shader address */
1141     OUT_BATCH(batch, 0); /* without GS kernel */
1142     OUT_BATCH(batch, 0);
1143     /* DW3. GS shader dispatch flag */
1144     OUT_BATCH(batch, 0);
1145     OUT_BATCH(batch, 0);
1146     OUT_BATCH(batch, 0);
1147     /* DW6. GS shader GRF and URB offset/length */
1148     OUT_BATCH(batch, 0);
1149     OUT_BATCH(batch, 0); /* pass-through */
1150     OUT_BATCH(batch, 0);
1151     OUT_BATCH(batch, 0);
1152     ADVANCE_BATCH(batch);
1153
1154     BEGIN_BATCH(batch, 2);
1155     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1156     OUT_BATCH(batch, 0);
1157     ADVANCE_BATCH(batch);
1158
1159     BEGIN_BATCH(batch, 2);
1160     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1161     OUT_BATCH(batch, 0);
1162     ADVANCE_BATCH(batch);
1163
1164     /* disable HS */
1165     BEGIN_BATCH(batch, 11);
1166     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1167     OUT_BATCH(batch, 0);
1168     OUT_BATCH(batch, 0);
1169     OUT_BATCH(batch, 0);
1170     OUT_BATCH(batch, 0);
1171     OUT_BATCH(batch, 0);
1172     OUT_BATCH(batch, 0);
1173     OUT_BATCH(batch, 0);
1174     OUT_BATCH(batch, 0);
1175     OUT_BATCH(batch, 0);
1176     OUT_BATCH(batch, 0);
1177     ADVANCE_BATCH(batch);
1178
1179     BEGIN_BATCH(batch, 9);
1180     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1181     OUT_BATCH(batch, 0);
1182     /*DW2. HS pass-through */
1183     OUT_BATCH(batch, 0);
1184     /*DW3. HS shader address */
1185     OUT_BATCH(batch, 0);
1186     OUT_BATCH(batch, 0);
1187     /*DW5. HS shader flag. URB offset/length and so on */
1188     OUT_BATCH(batch, 0);
1189     OUT_BATCH(batch, 0);
1190     OUT_BATCH(batch, 0);
1191     OUT_BATCH(batch, 0);
1192     ADVANCE_BATCH(batch);
1193
1194     BEGIN_BATCH(batch, 2);
1195     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1196     OUT_BATCH(batch, 0);
1197     ADVANCE_BATCH(batch);
1198
1199     BEGIN_BATCH(batch, 2);
1200     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1201     OUT_BATCH(batch, 0);
1202     ADVANCE_BATCH(batch);
1203
1204     /* Disable TE */
1205     BEGIN_BATCH(batch, 4);
1206     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1207     OUT_BATCH(batch, 0);
1208     OUT_BATCH(batch, 0);
1209     OUT_BATCH(batch, 0);
1210     ADVANCE_BATCH(batch);
1211
1212     /* Disable DS */
1213     BEGIN_BATCH(batch, 11);
1214     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1215     OUT_BATCH(batch, 0);
1216     OUT_BATCH(batch, 0);
1217     OUT_BATCH(batch, 0);
1218     OUT_BATCH(batch, 0);
1219     OUT_BATCH(batch, 0);
1220     OUT_BATCH(batch, 0);
1221     OUT_BATCH(batch, 0);
1222     OUT_BATCH(batch, 0);
1223     OUT_BATCH(batch, 0);
1224     OUT_BATCH(batch, 0);
1225     ADVANCE_BATCH(batch);
1226
1227     BEGIN_BATCH(batch, 9);
1228     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
1229     /* DW1. DS shader pointer */
1230     OUT_BATCH(batch, 0);
1231     OUT_BATCH(batch, 0);
1232     /* DW3-5. DS shader dispatch flag.*/
1233     OUT_BATCH(batch, 0);
1234     OUT_BATCH(batch, 0);
1235     OUT_BATCH(batch, 0);
1236     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1237     OUT_BATCH(batch, 0);
1238     OUT_BATCH(batch, 0);
1239     /* DW8. DS shader output URB */
1240     OUT_BATCH(batch, 0);
1241     ADVANCE_BATCH(batch);
1242
1243     BEGIN_BATCH(batch, 2);
1244     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1245     OUT_BATCH(batch, 0);
1246     ADVANCE_BATCH(batch);
1247
1248     BEGIN_BATCH(batch, 2);
1249     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1250     OUT_BATCH(batch, 0);
1251     ADVANCE_BATCH(batch);
1252
1253     /* Disable STREAMOUT */
1254     BEGIN_BATCH(batch, 5);
1255     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1256     OUT_BATCH(batch, 0);
1257     OUT_BATCH(batch, 0);
1258     OUT_BATCH(batch, 0);
1259     OUT_BATCH(batch, 0);
1260     ADVANCE_BATCH(batch);
1261 }
1262
1263 static void
1264 gen8_emit_invarient_states(VADriverContextP ctx)
1265 {
1266     struct i965_driver_data *i965 = i965_driver_data(ctx);
1267     struct intel_batchbuffer *batch = i965->batch;
1268
1269     BEGIN_BATCH(batch, 1);
1270     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1271     ADVANCE_BATCH(batch);
1272
1273     BEGIN_BATCH(batch, 2);
1274     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1275     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1276               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1277     ADVANCE_BATCH(batch);
1278
1279     /* Update 3D Multisample pattern */
1280     BEGIN_BATCH(batch, 9);
1281     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1282     OUT_BATCH(batch, 0);
1283     OUT_BATCH(batch, 0);
1284     OUT_BATCH(batch, 0);
1285     OUT_BATCH(batch, 0);
1286     OUT_BATCH(batch, 0);
1287     OUT_BATCH(batch, 0);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0);
1290     ADVANCE_BATCH(batch);
1291
1292
1293     BEGIN_BATCH(batch, 2);
1294     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1295     OUT_BATCH(batch, 1);
1296     ADVANCE_BATCH(batch);
1297
1298     /* Set system instruction pointer */
1299     BEGIN_BATCH(batch, 3);
1300     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1301     OUT_BATCH(batch, 0);
1302     OUT_BATCH(batch, 0);
1303     ADVANCE_BATCH(batch);
1304 }
1305
1306 static void
1307 gen8_emit_clip_state(VADriverContextP ctx)
1308 {
1309     struct i965_driver_data *i965 = i965_driver_data(ctx);
1310     struct intel_batchbuffer *batch = i965->batch;
1311
1312     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1313     OUT_BATCH(batch, 0);
1314     OUT_BATCH(batch, 0); /* pass-through */
1315     OUT_BATCH(batch, 0);
1316 }
1317
1318 static void
1319 gen8_emit_sf_state(VADriverContextP ctx)
1320 {
1321     struct i965_driver_data *i965 = i965_driver_data(ctx);
1322     struct intel_batchbuffer *batch = i965->batch;
1323
1324     BEGIN_BATCH(batch, 5);
1325     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1326     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1327     OUT_BATCH(batch, 0);
1328     OUT_BATCH(batch, 0);
1329     OUT_BATCH(batch, 0);
1330     ADVANCE_BATCH(batch);
1331
1332
1333     BEGIN_BATCH(batch, 4);
1334     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
1335     OUT_BATCH(batch,
1336               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1337               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1338               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1339               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1340               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1341     OUT_BATCH(batch, 0);
1342     OUT_BATCH(batch, 0);
1343     ADVANCE_BATCH(batch);
1344
1345     /* SBE for backend setup */
1346     BEGIN_BATCH(batch, 11);
1347     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1348     OUT_BATCH(batch, 0);
1349     OUT_BATCH(batch, 0);
1350     OUT_BATCH(batch, 0);
1351     OUT_BATCH(batch, 0);
1352     OUT_BATCH(batch, 0);
1353     OUT_BATCH(batch, 0);
1354     OUT_BATCH(batch, 0);
1355     OUT_BATCH(batch, 0);
1356     OUT_BATCH(batch, 0);
1357     OUT_BATCH(batch, 0);
1358     ADVANCE_BATCH(batch);
1359
1360     BEGIN_BATCH(batch, 4);
1361     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1362     OUT_BATCH(batch, 0);
1363     OUT_BATCH(batch, 0);
1364     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1365     ADVANCE_BATCH(batch);
1366 }
1367
1368 static void
1369 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
1370 {
1371     struct i965_driver_data *i965 = i965_driver_data(ctx);
1372     struct intel_batchbuffer *batch = i965->batch;
1373     struct i965_render_state *render_state = &i965->render_state;
1374     unsigned int num_samples = 0;
1375     unsigned int max_threads;
1376
1377     max_threads = i965->intel.device_info->max_wm_threads - 2;
1378
1379     BEGIN_BATCH(batch, 2);
1380     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1381     OUT_BATCH(batch,
1382               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1383     ADVANCE_BATCH(batch);
1384
1385     if (kernel == PS_KERNEL) {
1386         BEGIN_BATCH(batch, 2);
1387         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1388         OUT_BATCH(batch,
1389                   GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1390         ADVANCE_BATCH(batch);
1391     } else if (kernel == PS_SUBPIC_KERNEL) {
1392         BEGIN_BATCH(batch, 2);
1393         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1394         OUT_BATCH(batch,
1395                   (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1396                    GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1397                    (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1398                    (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1399                    (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1400                    (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1401         ADVANCE_BATCH(batch);
1402     }
1403
1404     BEGIN_BATCH(batch, 2);
1405     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1406     OUT_BATCH(batch,
1407               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1408     ADVANCE_BATCH(batch);
1409
1410     BEGIN_BATCH(batch, 11);
1411     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1412     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1413     OUT_BATCH(batch, 0);
1414     /*DW3-4. Constant buffer 0 */
1415     OUT_BATCH(batch, render_state->curbe_offset);
1416     OUT_BATCH(batch, 0);
1417
1418     /*DW5-10. Constant buffer 1-3 */
1419     OUT_BATCH(batch, 0);
1420     OUT_BATCH(batch, 0);
1421     OUT_BATCH(batch, 0);
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424     OUT_BATCH(batch, 0);
1425     ADVANCE_BATCH(batch);
1426
1427     BEGIN_BATCH(batch, 12);
1428     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1429     /* PS shader address */
1430     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1431
1432     OUT_BATCH(batch, 0);
1433     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1434     OUT_BATCH(batch,
1435               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1436               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
1437               GEN7_PS_VECTOR_MASK_ENABLE);
1438     /* DW4-5. Scatch space */
1439     OUT_BATCH(batch, 0); /* scratch space base offset */
1440     OUT_BATCH(batch, 0);
1441     /* DW6. PS shader threads. */
1442     OUT_BATCH(batch,
1443               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1444               GEN7_PS_PUSH_CONSTANT_ENABLE |
1445               GEN7_PS_16_DISPATCH_ENABLE);
1446     /* DW7. PS shader GRF */
1447     OUT_BATCH(batch,
1448               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1449     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1450     OUT_BATCH(batch, 0);
1451     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1452     OUT_BATCH(batch, 0);
1453     ADVANCE_BATCH(batch);
1454
1455     BEGIN_BATCH(batch, 2);
1456     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1457     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1458     ADVANCE_BATCH(batch);
1459 }
1460
1461 static void
1462 gen8_emit_depth_buffer_state(VADriverContextP ctx)
1463 {
1464     struct i965_driver_data *i965 = i965_driver_data(ctx);
1465     struct intel_batchbuffer *batch = i965->batch;
1466
1467     BEGIN_BATCH(batch, 8);
1468     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1469     OUT_BATCH(batch,
1470               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1471               (I965_SURFACE_NULL << 29));
1472     /* DW2-3. Depth Buffer Address */
1473     OUT_BATCH(batch, 0);
1474     OUT_BATCH(batch, 0);
1475     /* DW4-7. Surface structure */
1476     OUT_BATCH(batch, 0);
1477     OUT_BATCH(batch, 0);
1478     OUT_BATCH(batch, 0);
1479     OUT_BATCH(batch, 0);
1480     ADVANCE_BATCH(batch);
1481
1482     /* Update the Hier Depth buffer */
1483     BEGIN_BATCH(batch, 5);
1484     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1485     OUT_BATCH(batch, 0);
1486     OUT_BATCH(batch, 0);
1487     OUT_BATCH(batch, 0);
1488     OUT_BATCH(batch, 0);
1489     ADVANCE_BATCH(batch);
1490
1491     /* Update the stencil buffer */
1492     BEGIN_BATCH(batch, 5);
1493     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1494     OUT_BATCH(batch, 0);
1495     OUT_BATCH(batch, 0);
1496     OUT_BATCH(batch, 0);
1497     OUT_BATCH(batch, 0);
1498     ADVANCE_BATCH(batch);
1499
1500     BEGIN_BATCH(batch, 3);
1501     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1502     OUT_BATCH(batch, 0);
1503     OUT_BATCH(batch, 0);
1504     ADVANCE_BATCH(batch);
1505 }
1506
1507 static void
1508 gen8_emit_depth_stencil_state(VADriverContextP ctx)
1509 {
1510     struct i965_driver_data *i965 = i965_driver_data(ctx);
1511     struct intel_batchbuffer *batch = i965->batch;
1512
1513     BEGIN_BATCH(batch, 3);
1514     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1515     OUT_BATCH(batch, 0);
1516     OUT_BATCH(batch, 0);
1517     ADVANCE_BATCH(batch);
1518 }
1519
1520 static void
1521 gen8_emit_wm_hz_op(VADriverContextP ctx)
1522 {
1523     struct i965_driver_data *i965 = i965_driver_data(ctx);
1524     struct intel_batchbuffer *batch = i965->batch;
1525
1526     BEGIN_BATCH(batch, 5);
1527     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1528     OUT_BATCH(batch, 0);
1529     OUT_BATCH(batch, 0);
1530     OUT_BATCH(batch, 0);
1531     OUT_BATCH(batch, 0);
1532     ADVANCE_BATCH(batch);
1533 }
1534
1535 static void
1536 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
1537 {
1538     struct i965_driver_data *i965 = i965_driver_data(ctx);
1539     struct intel_batchbuffer *batch = i965->batch;
1540     struct i965_render_state *render_state = &i965->render_state;
1541
1542     BEGIN_BATCH(batch, 2);
1543     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1544     OUT_BATCH(batch, render_state->cc_viewport_offset);
1545     ADVANCE_BATCH(batch);
1546
1547     BEGIN_BATCH(batch, 2);
1548     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1549     OUT_BATCH(batch, 0);
1550     ADVANCE_BATCH(batch);
1551 }
1552
1553 static void
1554 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
1555 {
1556     struct i965_driver_data *i965 = i965_driver_data(ctx);
1557     struct intel_batchbuffer *batch = i965->batch;
1558     struct i965_render_state *render_state = &i965->render_state;
1559
1560     BEGIN_BATCH(batch, 2);
1561     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1562     OUT_BATCH(batch, render_state->sampler_offset);
1563     ADVANCE_BATCH(batch);
1564 }
1565
1566
1567 static void
1568 gen7_emit_drawing_rectangle(VADriverContextP ctx)
1569 {
1570     i965_render_drawing_rectangle(ctx);
1571 }
1572
1573 static void
1574 gen8_render_emit_states(VADriverContextP ctx, int kernel)
1575 {
1576     struct i965_driver_data *i965 = i965_driver_data(ctx);
1577     struct intel_batchbuffer *batch = i965->batch;
1578
1579     intel_batchbuffer_start_atomic(batch, 0x1000);
1580     intel_batchbuffer_emit_mi_flush(batch);
1581     gen8_emit_invarient_states(ctx);
1582     gen8_emit_state_base_address(ctx);
1583     gen8_emit_viewport_state_pointers(ctx);
1584     gen8_emit_urb(ctx);
1585     gen8_emit_cc_state_pointers(ctx);
1586     gen8_emit_sampler_state_pointers(ctx);
1587     gen8_emit_wm_hz_op(ctx);
1588     gen8_emit_bypass_state(ctx);
1589     gen8_emit_vs_state(ctx);
1590     gen8_emit_clip_state(ctx);
1591     gen8_emit_sf_state(ctx);
1592     gen8_emit_depth_stencil_state(ctx);
1593     gen8_emit_wm_state(ctx, kernel);
1594     gen8_emit_depth_buffer_state(ctx);
1595     gen7_emit_drawing_rectangle(ctx);
1596     gen8_emit_vertex_element_state(ctx);
1597     gen8_emit_vertices(ctx);
1598     intel_batchbuffer_end_atomic(batch);
1599 }
1600
1601 static void
1602 gen8_render_put_surface(
1603     VADriverContextP   ctx,
1604     struct object_surface *obj_surface,
1605     const VARectangle *src_rect,
1606     const VARectangle *dst_rect,
1607     unsigned int       flags
1608 )
1609 {
1610     struct i965_driver_data *i965 = i965_driver_data(ctx);
1611     struct intel_batchbuffer *batch = i965->batch;
1612
1613     gen8_render_initialize(ctx);
1614     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1615     gen8_clear_dest_region(ctx);
1616     gen8_render_emit_states(ctx, PS_KERNEL);
1617     intel_batchbuffer_flush(batch);
1618 }
1619
1620 static void
1621 gen8_subpicture_render_blend_state(VADriverContextP ctx)
1622 {
1623     struct i965_driver_data *i965 = i965_driver_data(ctx);
1624     struct i965_render_state *render_state = &i965->render_state;
1625     struct gen8_global_blend_state *global_blend_state;
1626     struct gen8_blend_state_rt *blend_state;
1627     unsigned char *cc_ptr;
1628
1629     dri_bo_map(render_state->dynamic_state.bo, 1);
1630     assert(render_state->dynamic_state.bo->virtual);
1631
1632     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1633              render_state->blend_state_offset;
1634
1635     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1636
1637     memset(global_blend_state, 0, render_state->blend_state_size);
1638     /* Global blend state + blend_state for Render Target */
1639     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1640     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1641     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1642     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1643     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1644     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1645     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1646     blend_state->blend0.colorbuf_blend = 1;
1647     blend_state->blend1.post_blend_clamp_enable = 1;
1648     blend_state->blend1.pre_blend_clamp_enable = 1;
1649     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1650
1651     dri_bo_unmap(render_state->dynamic_state.bo);
1652 }
1653
1654 static void
1655 gen8_subpic_render_upload_constants(VADriverContextP ctx,
1656                                     struct object_surface *obj_surface)
1657 {
1658     struct i965_driver_data *i965 = i965_driver_data(ctx);
1659     struct i965_render_state *render_state = &i965->render_state;
1660     float *constant_buffer;
1661     float global_alpha = 1.0;
1662     unsigned int index = obj_surface->subpic_render_idx;
1663     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1664     unsigned char *cc_ptr;
1665
1666     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1667         global_alpha = obj_subpic->global_alpha;
1668     }
1669
1670
1671     dri_bo_map(render_state->dynamic_state.bo, 1);
1672     assert(render_state->dynamic_state.bo->virtual);
1673
1674     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1675              render_state->curbe_offset;
1676
1677     constant_buffer = (float *) cc_ptr;
1678     *constant_buffer = global_alpha;
1679
1680     dri_bo_unmap(render_state->dynamic_state.bo);
1681 }
1682
1683 static void
1684 gen8_subpicture_render_setup_states(
1685     VADriverContextP   ctx,
1686     struct object_surface *obj_surface,
1687     const VARectangle *src_rect,
1688     const VARectangle *dst_rect
1689 )
1690 {
1691     gen8_render_dest_surface_state(ctx, 0);
1692     gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
1693     gen8_render_sampler(ctx);
1694     gen8_render_cc_viewport(ctx);
1695     gen8_render_color_calc_state(ctx);
1696     gen8_subpicture_render_blend_state(ctx);
1697     gen8_subpic_render_upload_constants(ctx, obj_surface);
1698     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1699 }
1700
1701 static void
1702 gen8_render_put_subpicture(
1703     VADriverContextP   ctx,
1704     struct object_surface *obj_surface,
1705     const VARectangle *src_rect,
1706     const VARectangle *dst_rect
1707 )
1708 {
1709     struct i965_driver_data *i965 = i965_driver_data(ctx);
1710     struct intel_batchbuffer *batch = i965->batch;
1711     unsigned int index = obj_surface->subpic_render_idx;
1712     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1713
1714     assert(obj_subpic);
1715     gen8_render_initialize(ctx);
1716     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1717     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1718     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1719     intel_batchbuffer_flush(batch);
1720 }
1721
1722 static void
1723 gen8_render_terminate(VADriverContextP ctx)
1724 {
1725     struct i965_driver_data *i965 = i965_driver_data(ctx);
1726     struct i965_render_state *render_state = &i965->render_state;
1727
1728     dri_bo_unreference(render_state->vb.vertex_buffer);
1729     render_state->vb.vertex_buffer = NULL;
1730
1731     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1732     render_state->wm.surface_state_binding_table_bo = NULL;
1733
1734     if (render_state->instruction_state.bo) {
1735         dri_bo_unreference(render_state->instruction_state.bo);
1736         render_state->instruction_state.bo = NULL;
1737     }
1738
1739     if (render_state->dynamic_state.bo) {
1740         dri_bo_unreference(render_state->dynamic_state.bo);
1741         render_state->dynamic_state.bo = NULL;
1742     }
1743
1744     if (render_state->indirect_state.bo) {
1745         dri_bo_unreference(render_state->indirect_state.bo);
1746         render_state->indirect_state.bo = NULL;
1747     }
1748
1749     if (render_state->draw_region) {
1750         dri_bo_unreference(render_state->draw_region->bo);
1751         free(render_state->draw_region);
1752         render_state->draw_region = NULL;
1753     }
1754 }
1755
1756 bool
1757 gen8_render_init(VADriverContextP ctx)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct i965_render_state *render_state = &i965->render_state;
1761     int i, kernel_size;
1762     unsigned int kernel_offset, end_offset;
1763     unsigned char *kernel_ptr;
1764     struct i965_kernel *kernel;
1765
1766     render_state->render_put_surface = gen8_render_put_surface;
1767     render_state->render_put_subpicture = gen8_render_put_subpicture;
1768     render_state->render_terminate = gen8_render_terminate;
1769
1770     memcpy(render_state->render_kernels, render_kernels_gen8,
1771            sizeof(render_state->render_kernels));
1772
1773     kernel_size = 4096;
1774
1775     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1776         kernel = &render_state->render_kernels[i];
1777
1778         if (!kernel->size)
1779             continue;
1780
1781         kernel_size += kernel->size;
1782     }
1783
1784     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1785                                                       "kernel shader",
1786                                                       kernel_size,
1787                                                       0x1000);
1788     if (render_state->instruction_state.bo == NULL) {
1789         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1790         return false;
1791     }
1792
1793     assert(render_state->instruction_state.bo);
1794
1795     render_state->instruction_state.bo_size = kernel_size;
1796     render_state->instruction_state.end_offset = 0;
1797     end_offset = 0;
1798
1799     dri_bo_map(render_state->instruction_state.bo, 1);
1800     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1801     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1802         kernel = &render_state->render_kernels[i];
1803         kernel_offset = end_offset;
1804         kernel->kernel_offset = kernel_offset;
1805
1806         if (!kernel->size)
1807             continue;
1808
1809         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1810
1811         end_offset += ALIGN(kernel->size, ALIGNMENT);
1812     }
1813
1814     render_state->instruction_state.end_offset = end_offset;
1815
1816     dri_bo_unmap(render_state->instruction_state.bo);
1817
1818     return true;
1819 }