OSDN Git Service

render: clear background using 3D pipeline on GEN8+
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_render.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40
41 #include <va/va_drmcommon.h>
42
43 #include "intel_batchbuffer.h"
44 #include "intel_driver.h"
45 #include "i965_defines.h"
46 #include "i965_drv_video.h"
47 #include "i965_structs.h"
48 #include "i965_yuv_coefs.h"
49
50 #include "i965_render.h"
51
52 /* Programs for Gen8 */
53 static const uint32_t sf_kernel_static_gen9[][4] = {
54
55 };
56 static const uint32_t ps_kernel_static_gen9[][4] = {
57 #include "shaders/render/exa_wm_src_affine.g9b"
58 #include "shaders/render/exa_wm_src_sample_planar.g9b"
59 #include "shaders/render/exa_wm_yuv_color_balance.g9b"
60 #include "shaders/render/exa_wm_yuv_rgb.g9b"
61 #include "shaders/render/exa_wm_write.g9b"
62 };
63
64 static const uint32_t ps_subpic_kernel_static_gen9[][4] = {
65 #include "shaders/render/exa_wm_src_affine.g9b"
66 #include "shaders/render/exa_wm_src_sample_argb.g9b"
67 #include "shaders/render/exa_wm_write.g9b"
68 };
69
70 static const uint32_t ps_clear_kernel_static_gen9[][4] = {
71 #include "shaders/render/exa_wm_src_affine.g9b"
72 #include "shaders/render/exa_wm_clear.g9b"
73 };
74
75 #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
76
77 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
78 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
79
80 enum {
81     SF_KERNEL = 0,
82     PS_KERNEL,
83     PS_SUBPIC_KERNEL,
84     PS_CLEAR_KERNEL
85 };
86
87 static struct i965_kernel render_kernels_gen9[] = {
88     {
89         "SF",
90         SF_KERNEL,
91         sf_kernel_static_gen9,
92         sizeof(sf_kernel_static_gen9),
93         NULL
94     },
95     {
96         "PS",
97         PS_KERNEL,
98         ps_kernel_static_gen9,
99         sizeof(ps_kernel_static_gen9),
100         NULL
101     },
102
103     {
104         "PS_SUBPIC",
105         PS_SUBPIC_KERNEL,
106         ps_subpic_kernel_static_gen9,
107         sizeof(ps_subpic_kernel_static_gen9),
108         NULL
109     },
110
111     {
112         "PS_CLEAR",
113         PS_CLEAR_KERNEL,
114         ps_clear_kernel_static_gen9,
115         sizeof(ps_clear_kernel_static_gen9),
116         NULL
117     }
118 };
119
120 #define URB_VS_ENTRIES        8
121 #define URB_VS_ENTRY_SIZE     1
122
123 #define URB_GS_ENTRIES        0
124 #define URB_GS_ENTRY_SIZE     0
125
126 #define URB_CLIP_ENTRIES      0
127 #define URB_CLIP_ENTRY_SIZE   0
128
129 #define URB_SF_ENTRIES        1
130 #define URB_SF_ENTRY_SIZE     2
131
132 #define URB_CS_ENTRIES        4
133 #define URB_CS_ENTRY_SIZE     4
134
135 static void
136 gen9_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
137 {
138     switch (tiling) {
139     case I915_TILING_NONE:
140         ss->ss0.tiled_surface = 0;
141         ss->ss0.tile_walk = 0;
142         break;
143     case I915_TILING_X:
144         ss->ss0.tiled_surface = 1;
145         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
146         break;
147     case I915_TILING_Y:
148         ss->ss0.tiled_surface = 1;
149         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
150         break;
151     }
152 }
153
154 /* Set "Shader Channel Select" for GEN9+ */
155 static void
156 gen9_render_set_surface_scs(struct gen8_surface_state *ss)
157 {
158     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
159     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
160     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
161     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
162 }
163
164 static void
165 gen9_render_set_surface_state(
166     struct gen8_surface_state *ss,
167     dri_bo                    *bo,
168     unsigned long              offset,
169     int                        width,
170     int                        height,
171     int                        pitch,
172     int                        format,
173     unsigned int               flags
174 )
175 {
176     unsigned int tiling;
177     unsigned int swizzle;
178
179     memset(ss, 0, sizeof(*ss));
180
181     switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
182     case VA_BOTTOM_FIELD:
183         ss->ss0.vert_line_stride_ofs = 1;
184         /* fall-through */
185     case VA_TOP_FIELD:
186         ss->ss0.vert_line_stride = 1;
187         height /= 2;
188         break;
189     }
190
191     ss->ss0.surface_type = I965_SURFACE_2D;
192     ss->ss0.surface_format = format;
193
194     ss->ss8.base_addr = bo->offset + offset;
195
196     ss->ss2.width = width - 1;
197     ss->ss2.height = height - 1;
198
199     ss->ss3.pitch = pitch - 1;
200
201     /* Always set 1(align 4 mode) per B-spec */
202     ss->ss0.vertical_alignment = 1;
203     ss->ss0.horizontal_alignment = 1;
204
205     dri_bo_get_tiling(bo, &tiling, &swizzle);
206     gen9_render_set_surface_tiling(ss, tiling);
207 }
208
209 static void
210 gen9_render_src_surface_state(
211     VADriverContextP ctx,
212     int              index,
213     dri_bo          *region,
214     unsigned long    offset,
215     int              w,
216     int              h,
217     int              pitch,
218     int              format,
219     unsigned int     flags
220 )
221 {
222     struct i965_driver_data *i965 = i965_driver_data(ctx);
223     struct i965_render_state *render_state = &i965->render_state;
224     void *ss;
225     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
226
227     assert(index < MAX_RENDER_SURFACES);
228
229     dri_bo_map(ss_bo, 1);
230     assert(ss_bo->virtual);
231     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
232
233     gen9_render_set_surface_state(ss,
234                                   region, offset,
235                                   w, h,
236                                   pitch, format, flags);
237     gen9_render_set_surface_scs(ss);
238     dri_bo_emit_reloc(ss_bo,
239                       I915_GEM_DOMAIN_SAMPLER, 0,
240                       offset,
241                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
242                       region);
243
244     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
245     dri_bo_unmap(ss_bo);
246     render_state->wm.sampler_count++;
247 }
248
249 static void
250 gen9_render_src_surfaces_state(
251     VADriverContextP ctx,
252     struct object_surface *obj_surface,
253     unsigned int     flags
254 )
255 {
256     int region_pitch;
257     int rw, rh;
258     dri_bo *region;
259
260     region_pitch = obj_surface->width;
261     rw = obj_surface->orig_width;
262     rh = obj_surface->orig_height;
263     region = obj_surface->bo;
264
265     if (obj_surface->fourcc == VA_FOURCC('P', '0', '1', '0')) {
266         gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R16_UNORM, flags);     /* Y */
267         gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R16_UNORM, flags);
268         gen9_render_src_surface_state(ctx, 3, region,
269                                       region_pitch * obj_surface->y_cb_offset,
270                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
271                                       I965_SURFACEFORMAT_R16G16_UNORM, flags); /* UV */
272         gen9_render_src_surface_state(ctx, 4, region,
273                                       region_pitch * obj_surface->y_cb_offset,
274                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
275                                       I965_SURFACEFORMAT_R16G16_UNORM, flags);
276     } else {
277         gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
278         gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
279
280         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
281             gen9_render_src_surface_state(ctx, 3, region,
282                                           region_pitch * obj_surface->y_cb_offset,
283                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
284                                           I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
285             gen9_render_src_surface_state(ctx, 4, region,
286                                           region_pitch * obj_surface->y_cb_offset,
287                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
288                                           I965_SURFACEFORMAT_R8G8_UNORM, flags);
289         } else {
290             gen9_render_src_surface_state(ctx, 3, region,
291                                           region_pitch * obj_surface->y_cb_offset,
292                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
293                                           I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
294             gen9_render_src_surface_state(ctx, 4, region,
295                                           region_pitch * obj_surface->y_cb_offset,
296                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
297                                           I965_SURFACEFORMAT_R8_UNORM, flags);
298             gen9_render_src_surface_state(ctx, 5, region,
299                                           region_pitch * obj_surface->y_cr_offset,
300                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
301                                           I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
302             gen9_render_src_surface_state(ctx, 6, region,
303                                           region_pitch * obj_surface->y_cr_offset,
304                                           obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
305                                           I965_SURFACEFORMAT_R8_UNORM, flags);
306         }
307     }
308 }
309
310 static void
311 gen9_subpic_render_src_surfaces_state(VADriverContextP ctx,
312                                       struct object_surface *obj_surface)
313 {
314     dri_bo *subpic_region;
315     unsigned int index = obj_surface->subpic_render_idx;
316     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
317     struct object_image *obj_image = obj_subpic->obj_image;
318
319     assert(obj_surface);
320     assert(obj_surface->bo);
321     subpic_region = obj_image->bo;
322     /*subpicture surface*/
323     gen9_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
324     gen9_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
325 }
326
327 static void
328 gen9_render_dest_surface_state(VADriverContextP ctx, int index)
329 {
330     struct i965_driver_data *i965 = i965_driver_data(ctx);
331     struct i965_render_state *render_state = &i965->render_state;
332     struct intel_region *dest_region = render_state->draw_region;
333     void *ss;
334     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
335     int format;
336     assert(index < MAX_RENDER_SURFACES);
337
338     if (dest_region->cpp == 2) {
339         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
340     } else {
341         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
342     }
343
344     dri_bo_map(ss_bo, 1);
345     assert(ss_bo->virtual);
346     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
347
348     gen9_render_set_surface_state(ss,
349                                   dest_region->bo, 0,
350                                   dest_region->width, dest_region->height,
351                                   dest_region->pitch, format, 0);
352     gen9_render_set_surface_scs(ss);
353     dri_bo_emit_reloc(ss_bo,
354                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
355                       0,
356                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
357                       dest_region->bo);
358
359     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
360     dri_bo_unmap(ss_bo);
361 }
362
363 static void
364 i965_fill_vertex_buffer(
365     VADriverContextP ctx,
366     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
367     float vid_coords[4], /* [(x1,y1);(x2,y2)] */
368     int offset
369 )
370 {
371     struct i965_driver_data * const i965 = i965_driver_data(ctx);
372     float vb[12];
373
374     enum { X1, Y1, X2, Y2 };
375
376     static const unsigned int g_rotation_indices[][6] = {
377         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
378         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
379         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
380         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
381     };
382
383     const unsigned int * const rotation_indices =
384         g_rotation_indices[i965->rotation_attrib->value];
385
386     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
387     vb[1]  = tex_coords[rotation_indices[1]];
388     vb[2]  = vid_coords[X2];
389     vb[3]  = vid_coords[Y2];
390
391     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
392     vb[5]  = tex_coords[rotation_indices[3]];
393     vb[6]  = vid_coords[X1];
394     vb[7]  = vid_coords[Y2];
395
396     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
397     vb[9]  = tex_coords[rotation_indices[5]];
398     vb[10] = vid_coords[X1];
399     vb[11] = vid_coords[Y1];
400
401     dri_bo_subdata(i965->render_state.vb.vertex_buffer, offset, sizeof(vb), vb);
402 }
403
404 static void
405 i965_subpic_render_upload_vertex(VADriverContextP ctx,
406                                  struct object_surface *obj_surface,
407                                  const VARectangle *output_rect)
408 {
409     unsigned int index = obj_surface->subpic_render_idx;
410     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
411     float tex_coords[4], vid_coords[4];
412     VARectangle dst_rect;
413
414     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
415         dst_rect = obj_subpic->dst_rect;
416     else {
417         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
418         const float sy  = (float)output_rect->height / obj_surface->orig_height;
419         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
420         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
421         dst_rect.width  = sx * obj_subpic->dst_rect.width;
422         dst_rect.height = sy * obj_subpic->dst_rect.height;
423     }
424
425     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
426     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
427     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
428     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
429
430     vid_coords[0] = dst_rect.x;
431     vid_coords[1] = dst_rect.y;
432     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
433     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
434
435     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords, 0);
436 }
437
438 static void
439 i965_render_upload_vertex(
440     VADriverContextP   ctx,
441     struct object_surface *obj_surface,
442     const VARectangle *src_rect,
443     const VARectangle *dst_rect
444 )
445 {
446     struct i965_driver_data *i965 = i965_driver_data(ctx);
447     struct i965_render_state *render_state = &i965->render_state;
448     struct intel_region *dest_region = render_state->draw_region;
449     float tex_coords[4], vid_coords[4];
450     int width, height;
451
452     width  = obj_surface->orig_width;
453     height = obj_surface->orig_height;
454
455     tex_coords[0] = (float)src_rect->x / width;
456     tex_coords[1] = (float)src_rect->y / height;
457     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
458     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
459
460     vid_coords[0] = dest_region->x + dst_rect->x;
461     vid_coords[1] = dest_region->y + dst_rect->y;
462     vid_coords[2] = vid_coords[0] + dst_rect->width;
463     vid_coords[3] = vid_coords[1] + dst_rect->height;
464
465     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords, 0);
466
467     /*
468      * The vertices below are for background, and always set tex-coordinates
469      * although the tex-coordinates are not used in the the corresponding PS
470      * kernel.
471      */
472     tex_coords[0] = 0.0F;
473     tex_coords[1] = 0.0F;
474     tex_coords[2] = 1.0F;
475     tex_coords[3] = 1.0F;
476
477     vid_coords[0] = dest_region->x;
478     vid_coords[1] = dest_region->y;
479     vid_coords[2] = vid_coords[0] + dest_region->width;
480     vid_coords[3] = vid_coords[1] + dest_region->height;
481
482     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords, 12 * sizeof(float));
483 }
484
485 static void
486 i965_render_drawing_rectangle(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct intel_batchbuffer *batch = i965->batch;
490     struct i965_render_state *render_state = &i965->render_state;
491     struct intel_region *dest_region = render_state->draw_region;
492
493     BEGIN_BATCH(batch, 4);
494     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
495     OUT_BATCH(batch, 0x00000000);
496     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
497     OUT_BATCH(batch, 0x00000000);
498     ADVANCE_BATCH(batch);
499 }
500
501 static void
502 i965_render_upload_image_palette(
503     VADriverContextP ctx,
504     struct object_image *obj_image,
505     unsigned int     alpha
506 )
507 {
508     struct i965_driver_data *i965 = i965_driver_data(ctx);
509     struct intel_batchbuffer *batch = i965->batch;
510     unsigned int i;
511
512     assert(obj_image);
513
514     if (!obj_image)
515         return;
516
517     if (obj_image->image.num_palette_entries == 0)
518         return;
519
520     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
521     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
522     /*fill palette*/
523     //int32_t out[16]; //0-23:color 23-31:alpha
524     for (i = 0; i < obj_image->image.num_palette_entries; i++)
525         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
526     ADVANCE_BATCH(batch);
527 }
528
529 /*
530  * for GEN8
531  */
532 #define ALIGNMENT       64
533
534 static void
535 gen9_render_initialize(VADriverContextP ctx)
536 {
537     struct i965_driver_data *i965 = i965_driver_data(ctx);
538     struct i965_render_state *render_state = &i965->render_state;
539     dri_bo *bo;
540     int size;
541     unsigned int end_offset;
542
543     /* VERTEX BUFFER */
544     dri_bo_unreference(render_state->vb.vertex_buffer);
545     bo = dri_bo_alloc(i965->intel.bufmgr,
546                       "vertex buffer",
547                       4096,
548                       4096);
549     assert(bo);
550     render_state->vb.vertex_buffer = bo;
551
552     /* WM */
553     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
554     bo = dri_bo_alloc(i965->intel.bufmgr,
555                       "surface state & binding table",
556                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
557                       4096);
558     assert(bo);
559     render_state->wm.surface_state_binding_table_bo = bo;
560
561     render_state->curbe_size = 256;
562
563     render_state->wm.sampler_count = 0;
564
565     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
566
567     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
568
569     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
570
571     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
572                                      16 * sizeof(struct gen8_blend_state_rt);
573
574     render_state->sf_clip_size = 1024;
575
576     render_state->scissor_size = 1024;
577
578     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
579            ALIGN(render_state->sampler_size, ALIGNMENT) +
580            ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
581            ALIGN(render_state->cc_state_size, ALIGNMENT) +
582            ALIGN(render_state->blend_state_size, ALIGNMENT) +
583            ALIGN(render_state->sf_clip_size, ALIGNMENT) +
584            ALIGN(render_state->scissor_size, ALIGNMENT);
585
586     dri_bo_unreference(render_state->dynamic_state.bo);
587     bo = dri_bo_alloc(i965->intel.bufmgr,
588                       "dynamic_state",
589                       size,
590                       4096);
591
592     render_state->dynamic_state.bo = bo;
593
594     end_offset = 0;
595     render_state->dynamic_state.end_offset = 0;
596
597     /* Constant buffer offset */
598     render_state->curbe_offset = end_offset;
599     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
600
601     /* Sampler_state  */
602     render_state->sampler_offset = end_offset;
603     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
604
605     /* CC_VIEWPORT_state  */
606     render_state->cc_viewport_offset = end_offset;
607     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
608
609     /* CC_STATE_state  */
610     render_state->cc_state_offset = end_offset;
611     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
612
613     /* Blend_state  */
614     render_state->blend_state_offset = end_offset;
615     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
616
617     /* SF_CLIP_state  */
618     render_state->sf_clip_offset = end_offset;
619     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
620
621     /* SCISSOR_state  */
622     render_state->scissor_offset = end_offset;
623     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
624
625     /* update the end offset of dynamic_state */
626     render_state->dynamic_state.end_offset = end_offset;
627
628 }
629
630 static void
631 gen9_render_sampler(VADriverContextP ctx)
632 {
633     struct i965_driver_data *i965 = i965_driver_data(ctx);
634     struct i965_render_state *render_state = &i965->render_state;
635     struct gen8_sampler_state *sampler_state;
636     int i;
637     unsigned char *cc_ptr;
638
639     assert(render_state->wm.sampler_count > 0);
640     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
641
642     dri_bo_map(render_state->dynamic_state.bo, 1);
643     assert(render_state->dynamic_state.bo->virtual);
644
645     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
646              render_state->sampler_offset;
647
648     sampler_state = (struct gen8_sampler_state *) cc_ptr;
649
650     for (i = 0; i < render_state->wm.sampler_count; i++) {
651         memset(sampler_state, 0, sizeof(*sampler_state));
652         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
653         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
654         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
655         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
656         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
657         sampler_state++;
658     }
659
660     dri_bo_unmap(render_state->dynamic_state.bo);
661 }
662
663 static void
664 gen9_render_blend_state(VADriverContextP ctx)
665 {
666     struct i965_driver_data *i965 = i965_driver_data(ctx);
667     struct i965_render_state *render_state = &i965->render_state;
668     struct gen8_global_blend_state *global_blend_state;
669     struct gen8_blend_state_rt *blend_state;
670     unsigned char *cc_ptr;
671
672     dri_bo_map(render_state->dynamic_state.bo, 1);
673     assert(render_state->dynamic_state.bo->virtual);
674
675     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
676              render_state->blend_state_offset;
677
678     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
679
680     memset(global_blend_state, 0, render_state->blend_state_size);
681     /* Global blend state + blend_state for Render Target */
682     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
683     blend_state->blend1.logic_op_enable = 1;
684     blend_state->blend1.logic_op_func = 0xc;
685     blend_state->blend1.pre_blend_clamp_enable = 1;
686
687     dri_bo_unmap(render_state->dynamic_state.bo);
688 }
689
690
691 static void
692 gen9_render_cc_viewport(VADriverContextP ctx)
693 {
694     struct i965_driver_data *i965 = i965_driver_data(ctx);
695     struct i965_render_state *render_state = &i965->render_state;
696     struct i965_cc_viewport *cc_viewport;
697     unsigned char *cc_ptr;
698
699     dri_bo_map(render_state->dynamic_state.bo, 1);
700     assert(render_state->dynamic_state.bo->virtual);
701
702     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
703              render_state->cc_viewport_offset;
704
705     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
706
707     memset(cc_viewport, 0, sizeof(*cc_viewport));
708
709     cc_viewport->min_depth = -1.e35;
710     cc_viewport->max_depth = 1.e35;
711
712     dri_bo_unmap(render_state->dynamic_state.bo);
713 }
714
715 static void
716 gen9_render_color_calc_state(VADriverContextP ctx)
717 {
718     struct i965_driver_data *i965 = i965_driver_data(ctx);
719     struct i965_render_state *render_state = &i965->render_state;
720     struct gen6_color_calc_state *color_calc_state;
721     unsigned char *cc_ptr;
722
723     dri_bo_map(render_state->dynamic_state.bo, 1);
724     assert(render_state->dynamic_state.bo->virtual);
725
726     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
727              render_state->cc_state_offset;
728
729     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
730
731     memset(color_calc_state, 0, sizeof(*color_calc_state));
732     color_calc_state->constant_r = 1.0;
733     color_calc_state->constant_g = 0.0;
734     color_calc_state->constant_b = 1.0;
735     color_calc_state->constant_a = 1.0;
736     dri_bo_unmap(render_state->dynamic_state.bo);
737 }
738
739 #define PI  3.1415926
740
741 static void
742 gen9_render_upload_constants(VADriverContextP ctx,
743                              struct object_surface *obj_surface,
744                              unsigned int flags)
745 {
746     struct i965_driver_data *i965 = i965_driver_data(ctx);
747     struct i965_render_state *render_state = &i965->render_state;
748     unsigned short *constant_buffer;
749     unsigned char *cc_ptr;
750     float *color_balance_base;
751     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
752     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
753     float hue = (float)i965->hue_attrib->value / 180 * PI;
754     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
755     float *yuv_to_rgb;
756     unsigned int color_flag;
757     const float* yuv_coefs;
758     size_t coefs_length;
759
760     dri_bo_map(render_state->dynamic_state.bo, 1);
761     assert(render_state->dynamic_state.bo->virtual);
762
763     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
764              render_state->curbe_offset;
765
766     constant_buffer = (unsigned short *) cc_ptr;
767
768     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
769         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
770
771         *constant_buffer = 2;
772     } else {
773         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2') ||
774             obj_surface->fourcc == VA_FOURCC('P', '0', '1', '0'))
775             *constant_buffer = 1;
776         else
777             *constant_buffer = 0;
778     }
779
780     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
781         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
782         i965->hue_attrib->value == DEFAULT_HUE &&
783         i965->saturation_attrib->value == DEFAULT_SATURATION)
784         constant_buffer[1] = 1; /* skip color balance transformation */
785     else
786         constant_buffer[1] = 0;
787
788     color_balance_base = (float *)constant_buffer + 4;
789     *color_balance_base++ = contrast;
790     *color_balance_base++ = brightness;
791     *color_balance_base++ = cos(hue) * contrast * saturation;
792     *color_balance_base++ = sin(hue) * contrast * saturation;
793
794     color_flag = flags & VA_SRC_COLOR_MASK;
795     yuv_to_rgb = (float *)constant_buffer + 8;
796
797     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(color_flag),
798                                              &coefs_length);
799     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
800
801     dri_bo_unmap(render_state->dynamic_state.bo);
802 }
803
804 static void
805 gen9_render_setup_states(
806     VADriverContextP   ctx,
807     struct object_surface *obj_surface,
808     const VARectangle *src_rect,
809     const VARectangle *dst_rect,
810     unsigned int       flags
811 )
812 {
813     gen9_render_dest_surface_state(ctx, 0);
814     gen9_render_src_surfaces_state(ctx, obj_surface, flags);
815     gen9_render_sampler(ctx);
816     gen9_render_cc_viewport(ctx);
817     gen9_render_color_calc_state(ctx);
818     gen9_render_blend_state(ctx);
819     gen9_render_upload_constants(ctx, obj_surface, flags);
820     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
821 }
822
823 static void
824 gen9_emit_state_base_address(VADriverContextP ctx)
825 {
826     struct i965_driver_data *i965 = i965_driver_data(ctx);
827     struct intel_batchbuffer *batch = i965->batch;
828     struct i965_render_state *render_state = &i965->render_state;
829
830     BEGIN_BATCH(batch, 19);
831     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
832     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
833     OUT_BATCH(batch, 0);
834     OUT_BATCH(batch, 0);
835     /*DW4 */
836     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
837     OUT_BATCH(batch, 0);
838
839     /*DW6*/
840     /* Dynamic state base address */
841     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
842               0, BASE_ADDRESS_MODIFY);
843     OUT_BATCH(batch, 0);
844
845     /*DW8*/
846     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
847     OUT_BATCH(batch, 0);
848
849     /*DW10 */
850     /* Instruction base address */
851     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
852     OUT_BATCH(batch, 0);
853
854     /*DW12 */
855     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
856     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
857     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
858     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
859
860     /* the bindless surface state address */
861     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
862     OUT_BATCH(batch, 0);
863     OUT_BATCH(batch, 0xFFFF0000);
864     ADVANCE_BATCH(batch);
865 }
866
867 static void
868 gen9_emit_cc_state_pointers(VADriverContextP ctx)
869 {
870     struct i965_driver_data *i965 = i965_driver_data(ctx);
871     struct intel_batchbuffer *batch = i965->batch;
872     struct i965_render_state *render_state = &i965->render_state;
873
874     BEGIN_BATCH(batch, 2);
875     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
876     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
877     ADVANCE_BATCH(batch);
878
879     BEGIN_BATCH(batch, 2);
880     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
881     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
882     ADVANCE_BATCH(batch);
883
884 }
885
886 static void
887 gen9_emit_vertices(VADriverContextP ctx, int offset)
888 {
889     struct i965_driver_data *i965 = i965_driver_data(ctx);
890     struct intel_batchbuffer *batch = i965->batch;
891     struct i965_render_state *render_state = &i965->render_state;
892
893     BEGIN_BATCH(batch, 5);
894     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
895     OUT_BATCH(batch,
896               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
897               (0 << GEN8_VB0_MOCS_SHIFT) |
898               GEN7_VB0_ADDRESS_MODIFYENABLE |
899               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
900     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, offset);
901     OUT_BATCH(batch, 0);
902     OUT_BATCH(batch, 12 * 4);
903     ADVANCE_BATCH(batch);
904
905     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
906     BEGIN_BATCH(batch, 2);
907     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
908     OUT_BATCH(batch,
909               _3DPRIM_RECTLIST);
910     ADVANCE_BATCH(batch);
911
912     OUT_BATCH(batch, GEN7_3DSTATE_VF | (2 - 2));
913     OUT_BATCH(batch, 0);
914
915     OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
916     OUT_BATCH(batch, 0);
917     OUT_BATCH(batch, 0);
918
919     OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
920     OUT_BATCH(batch, 0);
921
922     BEGIN_BATCH(batch, 7);
923     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
924     OUT_BATCH(batch,
925               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
926     OUT_BATCH(batch, 3); /* vertex count per instance */
927     OUT_BATCH(batch, 0); /* start vertex offset */
928     OUT_BATCH(batch, 1); /* single instance */
929     OUT_BATCH(batch, 0); /* start instance location */
930     OUT_BATCH(batch, 0);
931     ADVANCE_BATCH(batch);
932 }
933
934 static void
935 gen9_emit_vertex_element_state(VADriverContextP ctx)
936 {
937     struct i965_driver_data *i965 = i965_driver_data(ctx);
938     struct intel_batchbuffer *batch = i965->batch;
939     int i;
940
941     /*
942      * The VUE layout
943      * dword 0-3: pad (0, 0, 0. 0)
944      * dword 4-7: position (x, y, 1.0, 1.0),
945      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
946      */
947
948     /* Set up our vertex elements, sourced from the single vertex buffer. */
949     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
950
951     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
952      * We don't really know or care what they do.
953      */
954
955     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
956               GEN8_VE0_VALID |
957               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
958               (0 << VE0_OFFSET_SHIFT));
959     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
960               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
961               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
962               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
963
964     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
965     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
966               GEN8_VE0_VALID |
967               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
968               (8 << VE0_OFFSET_SHIFT));
969     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
970               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
971               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
972               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
973
974     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
975     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
976               GEN8_VE0_VALID |
977               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
978               (0 << VE0_OFFSET_SHIFT));
979     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
980               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
981               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
982               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
983
984     /* Disable instancing for all vertex elements. */
985     for (i = 0; i < 3; i++) {
986         OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
987         OUT_BATCH(batch, i);
988         OUT_BATCH(batch, 0);
989     }
990 }
991
992 static void
993 gen9_emit_vs_state(VADriverContextP ctx)
994 {
995     struct i965_driver_data *i965 = i965_driver_data(ctx);
996     struct intel_batchbuffer *batch = i965->batch;
997
998     /* disable VS constant buffer */
999     BEGIN_BATCH(batch, 11);
1000     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1001     OUT_BATCH(batch, 0);
1002     OUT_BATCH(batch, 0);
1003     /* CS Buffer 0 */
1004     OUT_BATCH(batch, 0);
1005     OUT_BATCH(batch, 0);
1006     /* CS Buffer 1 */
1007     OUT_BATCH(batch, 0);
1008     OUT_BATCH(batch, 0);
1009     /* CS Buffer 2 */
1010     OUT_BATCH(batch, 0);
1011     OUT_BATCH(batch, 0);
1012     /* CS Buffer 3 */
1013     OUT_BATCH(batch, 0);
1014     OUT_BATCH(batch, 0);
1015     ADVANCE_BATCH(batch);
1016
1017     BEGIN_BATCH(batch, 9);
1018     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1019     OUT_BATCH(batch, 0); /* without VS kernel */
1020     OUT_BATCH(batch, 0);
1021     /* VS shader dispatch flag */
1022     OUT_BATCH(batch, 0);
1023     OUT_BATCH(batch, 0);
1024     OUT_BATCH(batch, 0);
1025     /* DW6. VS shader GRF and URB buffer definition */
1026     OUT_BATCH(batch, 0);
1027     OUT_BATCH(batch, 0); /* pass-through */
1028     OUT_BATCH(batch, 0);
1029     ADVANCE_BATCH(batch);
1030
1031     BEGIN_BATCH(batch, 2);
1032     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1033     OUT_BATCH(batch, 0);
1034     ADVANCE_BATCH(batch);
1035
1036     BEGIN_BATCH(batch, 2);
1037     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1038     OUT_BATCH(batch, 0);
1039     ADVANCE_BATCH(batch);
1040
1041 }
1042
1043 /*
1044  * URB layout on GEN8
1045  * ----------------------------------------
1046  * | PS Push Constants (8KB) | VS entries |
1047  * ----------------------------------------
1048  */
1049 static void
1050 gen9_emit_urb(VADriverContextP ctx)
1051 {
1052     struct i965_driver_data *i965 = i965_driver_data(ctx);
1053     struct intel_batchbuffer *batch = i965->batch;
1054     unsigned int num_urb_entries = 64;
1055
1056     /* The minimum urb entries is 64 */
1057
1058     BEGIN_BATCH(batch, 2);
1059     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1060     OUT_BATCH(batch, 0);
1061     ADVANCE_BATCH(batch);
1062
1063     BEGIN_BATCH(batch, 2);
1064     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1065     OUT_BATCH(batch, 0);
1066     ADVANCE_BATCH(batch);
1067
1068     BEGIN_BATCH(batch, 2);
1069     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1070     OUT_BATCH(batch, 0);
1071     ADVANCE_BATCH(batch);
1072
1073     BEGIN_BATCH(batch, 2);
1074     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1075     OUT_BATCH(batch, 0);
1076     ADVANCE_BATCH(batch);
1077
1078     /* Size is 8Kbs and base address is 0Kb */
1079     BEGIN_BATCH(batch, 2);
1080     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1081     /* Size is 8Kbs and base address is 0Kb */
1082     OUT_BATCH(batch,
1083               (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1084               (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1085     ADVANCE_BATCH(batch);
1086
1087     BEGIN_BATCH(batch, 2);
1088     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1089     OUT_BATCH(batch,
1090               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1091               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1092               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1093     ADVANCE_BATCH(batch);
1094
1095     BEGIN_BATCH(batch, 2);
1096     OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1097     OUT_BATCH(batch,
1098               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1099               (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1100     ADVANCE_BATCH(batch);
1101
1102     BEGIN_BATCH(batch, 2);
1103     OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1104     OUT_BATCH(batch,
1105               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1106               (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1107     ADVANCE_BATCH(batch);
1108
1109     BEGIN_BATCH(batch, 2);
1110     OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1111     OUT_BATCH(batch,
1112               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1113               (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1114     ADVANCE_BATCH(batch);
1115 }
1116
1117 static void
1118 gen9_emit_bypass_state(VADriverContextP ctx)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct intel_batchbuffer *batch = i965->batch;
1122
1123     /* bypass GS */
1124     BEGIN_BATCH(batch, 11);
1125     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1126     OUT_BATCH(batch, 0);
1127     OUT_BATCH(batch, 0);
1128     OUT_BATCH(batch, 0);
1129     OUT_BATCH(batch, 0);
1130     OUT_BATCH(batch, 0);
1131     OUT_BATCH(batch, 0);
1132     OUT_BATCH(batch, 0);
1133     OUT_BATCH(batch, 0);
1134     OUT_BATCH(batch, 0);
1135     OUT_BATCH(batch, 0);
1136     ADVANCE_BATCH(batch);
1137
1138     BEGIN_BATCH(batch, 10);
1139     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1140     /* GS shader address */
1141     OUT_BATCH(batch, 0); /* without GS kernel */
1142     OUT_BATCH(batch, 0);
1143     /* DW3. GS shader dispatch flag */
1144     OUT_BATCH(batch, 0);
1145     OUT_BATCH(batch, 0);
1146     OUT_BATCH(batch, 0);
1147     /* DW6. GS shader GRF and URB offset/length */
1148     OUT_BATCH(batch, 0);
1149     OUT_BATCH(batch, 0); /* pass-through */
1150     OUT_BATCH(batch, 0);
1151     OUT_BATCH(batch, 0);
1152     ADVANCE_BATCH(batch);
1153
1154     BEGIN_BATCH(batch, 2);
1155     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1156     OUT_BATCH(batch, 0);
1157     ADVANCE_BATCH(batch);
1158
1159     BEGIN_BATCH(batch, 2);
1160     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1161     OUT_BATCH(batch, 0);
1162     ADVANCE_BATCH(batch);
1163
1164     /* disable HS */
1165     BEGIN_BATCH(batch, 11);
1166     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1167     OUT_BATCH(batch, 0);
1168     OUT_BATCH(batch, 0);
1169     OUT_BATCH(batch, 0);
1170     OUT_BATCH(batch, 0);
1171     OUT_BATCH(batch, 0);
1172     OUT_BATCH(batch, 0);
1173     OUT_BATCH(batch, 0);
1174     OUT_BATCH(batch, 0);
1175     OUT_BATCH(batch, 0);
1176     OUT_BATCH(batch, 0);
1177     ADVANCE_BATCH(batch);
1178
1179     BEGIN_BATCH(batch, 9);
1180     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1181     OUT_BATCH(batch, 0);
1182     /*DW2. HS pass-through */
1183     OUT_BATCH(batch, 0);
1184     /*DW3. HS shader address */
1185     OUT_BATCH(batch, 0);
1186     OUT_BATCH(batch, 0);
1187     /*DW5. HS shader flag. URB offset/length and so on */
1188     OUT_BATCH(batch, 0);
1189     OUT_BATCH(batch, 0);
1190     OUT_BATCH(batch, 0);
1191     OUT_BATCH(batch, 0);
1192     ADVANCE_BATCH(batch);
1193
1194     BEGIN_BATCH(batch, 2);
1195     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1196     OUT_BATCH(batch, 0);
1197     ADVANCE_BATCH(batch);
1198
1199     BEGIN_BATCH(batch, 2);
1200     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1201     OUT_BATCH(batch, 0);
1202     ADVANCE_BATCH(batch);
1203
1204     /* Disable TE */
1205     BEGIN_BATCH(batch, 4);
1206     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1207     OUT_BATCH(batch, 0);
1208     OUT_BATCH(batch, 0);
1209     OUT_BATCH(batch, 0);
1210     ADVANCE_BATCH(batch);
1211
1212     /* Disable DS */
1213     BEGIN_BATCH(batch, 11);
1214     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1215     OUT_BATCH(batch, 0);
1216     OUT_BATCH(batch, 0);
1217     OUT_BATCH(batch, 0);
1218     OUT_BATCH(batch, 0);
1219     OUT_BATCH(batch, 0);
1220     OUT_BATCH(batch, 0);
1221     OUT_BATCH(batch, 0);
1222     OUT_BATCH(batch, 0);
1223     OUT_BATCH(batch, 0);
1224     OUT_BATCH(batch, 0);
1225     ADVANCE_BATCH(batch);
1226
1227     BEGIN_BATCH(batch, 11);
1228     OUT_BATCH(batch, GEN7_3DSTATE_DS | (11 - 2));
1229     /* DW1. DS shader pointer */
1230     OUT_BATCH(batch, 0);
1231     OUT_BATCH(batch, 0);
1232     /* DW3-5. DS shader dispatch flag.*/
1233     OUT_BATCH(batch, 0);
1234     OUT_BATCH(batch, 0);
1235     OUT_BATCH(batch, 0);
1236     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1237     OUT_BATCH(batch, 0);
1238     OUT_BATCH(batch, 0);
1239     /* DW8. DS shader output URB */
1240     OUT_BATCH(batch, 0);
1241
1242     /* Dual-patch kernel start pointer */
1243     OUT_BATCH(batch, 0);
1244     OUT_BATCH(batch, 0);
1245     ADVANCE_BATCH(batch);
1246
1247     BEGIN_BATCH(batch, 2);
1248     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1249     OUT_BATCH(batch, 0);
1250     ADVANCE_BATCH(batch);
1251
1252     BEGIN_BATCH(batch, 2);
1253     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1254     OUT_BATCH(batch, 0);
1255     ADVANCE_BATCH(batch);
1256
1257     /* Disable STREAMOUT */
1258     BEGIN_BATCH(batch, 5);
1259     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1260     OUT_BATCH(batch, 0);
1261     OUT_BATCH(batch, 0);
1262     OUT_BATCH(batch, 0);
1263     OUT_BATCH(batch, 0);
1264     ADVANCE_BATCH(batch);
1265 }
1266
1267 static void
1268 gen9_emit_invarient_states(VADriverContextP ctx)
1269 {
1270     struct i965_driver_data *i965 = i965_driver_data(ctx);
1271     struct intel_batchbuffer *batch = i965->batch;
1272
1273     BEGIN_BATCH(batch, 1);
1274     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D |
1275               GEN9_PIPELINE_SELECTION_MASK);
1276     ADVANCE_BATCH(batch);
1277
1278     BEGIN_BATCH(batch, 2);
1279     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1280     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1281               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1282     ADVANCE_BATCH(batch);
1283
1284     /* Update 3D Multisample pattern */
1285     BEGIN_BATCH(batch, 9);
1286     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1287     OUT_BATCH(batch, 0);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0);
1290     OUT_BATCH(batch, 0);
1291     OUT_BATCH(batch, 0);
1292     OUT_BATCH(batch, 0);
1293     OUT_BATCH(batch, 0);
1294     OUT_BATCH(batch, 0);
1295     ADVANCE_BATCH(batch);
1296
1297
1298     BEGIN_BATCH(batch, 2);
1299     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1300     OUT_BATCH(batch, 1);
1301     ADVANCE_BATCH(batch);
1302
1303     /* Set system instruction pointer */
1304     BEGIN_BATCH(batch, 3);
1305     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1306     OUT_BATCH(batch, 0);
1307     OUT_BATCH(batch, 0);
1308     ADVANCE_BATCH(batch);
1309 }
1310
1311 static void
1312 gen9_emit_clip_state(VADriverContextP ctx)
1313 {
1314     struct i965_driver_data *i965 = i965_driver_data(ctx);
1315     struct intel_batchbuffer *batch = i965->batch;
1316
1317     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1318     OUT_BATCH(batch, 0);
1319     OUT_BATCH(batch, 0); /* pass-through */
1320     OUT_BATCH(batch, 0);
1321 }
1322
1323 static void
1324 gen9_emit_sf_state(VADriverContextP ctx)
1325 {
1326     struct i965_driver_data *i965 = i965_driver_data(ctx);
1327     struct intel_batchbuffer *batch = i965->batch;
1328
1329     BEGIN_BATCH(batch, 5);
1330     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1331     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1332     OUT_BATCH(batch, 0);
1333     OUT_BATCH(batch, 0);
1334     OUT_BATCH(batch, 0);
1335     ADVANCE_BATCH(batch);
1336
1337
1338     BEGIN_BATCH(batch, 6);
1339     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (6 - 2));
1340     OUT_BATCH(batch,
1341               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1342               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1343               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1344               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1345               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1346     OUT_BATCH(batch, 0);
1347     OUT_BATCH(batch, 0);
1348     OUT_BATCH(batch, GEN9_SBE_ACTIVE_COMPONENT_XYZW);
1349     OUT_BATCH(batch, 0);
1350     ADVANCE_BATCH(batch);
1351
1352     /* SBE for backend setup */
1353     BEGIN_BATCH(batch, 11);
1354     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1355     OUT_BATCH(batch, 0);
1356     OUT_BATCH(batch, 0);
1357     OUT_BATCH(batch, 0);
1358     OUT_BATCH(batch, 0);
1359     OUT_BATCH(batch, 0);
1360     OUT_BATCH(batch, 0);
1361     OUT_BATCH(batch, 0);
1362     OUT_BATCH(batch, 0);
1363     OUT_BATCH(batch, 0);
1364     OUT_BATCH(batch, 0);
1365     ADVANCE_BATCH(batch);
1366
1367     BEGIN_BATCH(batch, 4);
1368     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1369     OUT_BATCH(batch, 0);
1370     OUT_BATCH(batch, 0);
1371     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1372     ADVANCE_BATCH(batch);
1373 }
1374
1375 static void
1376 gen9_emit_wm_state(VADriverContextP ctx, int kernel)
1377 {
1378     struct i965_driver_data *i965 = i965_driver_data(ctx);
1379     struct intel_batchbuffer *batch = i965->batch;
1380     struct i965_render_state *render_state = &i965->render_state;
1381     unsigned int num_samples = 0;
1382     unsigned int max_threads;
1383
1384     max_threads = i965->intel.device_info->max_wm_threads - 2;
1385
1386     BEGIN_BATCH(batch, 2);
1387     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1388     OUT_BATCH(batch,
1389               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1390     ADVANCE_BATCH(batch);
1391
1392     if (kernel == PS_KERNEL ||
1393         kernel == PS_CLEAR_KERNEL) {
1394         BEGIN_BATCH(batch, 2);
1395         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1396         OUT_BATCH(batch,
1397                   GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1398         ADVANCE_BATCH(batch);
1399     } else if (kernel == PS_SUBPIC_KERNEL) {
1400         BEGIN_BATCH(batch, 2);
1401         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1402         OUT_BATCH(batch,
1403                   (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1404                    GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1405                    (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1406                    (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1407                    (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1408                    (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1409         ADVANCE_BATCH(batch);
1410     }
1411
1412     BEGIN_BATCH(batch, 2);
1413     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1414     OUT_BATCH(batch,
1415               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1416     ADVANCE_BATCH(batch);
1417
1418     BEGIN_BATCH(batch, 11);
1419     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1420     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1421     OUT_BATCH(batch, 0);
1422     /*DW3-4. Constant buffer 0 */
1423     OUT_BATCH(batch, render_state->curbe_offset);
1424     OUT_BATCH(batch, 0);
1425
1426     /*DW5-10. Constant buffer 1-3 */
1427     OUT_BATCH(batch, 0);
1428     OUT_BATCH(batch, 0);
1429     OUT_BATCH(batch, 0);
1430     OUT_BATCH(batch, 0);
1431     OUT_BATCH(batch, 0);
1432     OUT_BATCH(batch, 0);
1433     ADVANCE_BATCH(batch);
1434
1435     BEGIN_BATCH(batch, 12);
1436     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1437     /* PS shader address */
1438     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1439
1440     OUT_BATCH(batch, 0);
1441     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1442     OUT_BATCH(batch,
1443               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1444               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
1445               GEN7_PS_VECTOR_MASK_ENABLE);
1446     /* DW4-5. Scatch space */
1447     OUT_BATCH(batch, 0); /* scratch space base offset */
1448     OUT_BATCH(batch, 0);
1449     /* DW6. PS shader threads. */
1450     OUT_BATCH(batch,
1451               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1452               GEN7_PS_PUSH_CONSTANT_ENABLE |
1453               GEN7_PS_16_DISPATCH_ENABLE);
1454     /* DW7. PS shader GRF */
1455     OUT_BATCH(batch,
1456               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1457     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1458     OUT_BATCH(batch, 0);
1459     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1460     OUT_BATCH(batch, 0);
1461     ADVANCE_BATCH(batch);
1462
1463     BEGIN_BATCH(batch, 2);
1464     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1465     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1466     ADVANCE_BATCH(batch);
1467 }
1468
1469 static void
1470 gen9_emit_depth_buffer_state(VADriverContextP ctx)
1471 {
1472     struct i965_driver_data *i965 = i965_driver_data(ctx);
1473     struct intel_batchbuffer *batch = i965->batch;
1474
1475     BEGIN_BATCH(batch, 8);
1476     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1477     OUT_BATCH(batch,
1478               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1479               (I965_SURFACE_NULL << 29));
1480     /* DW2-3. Depth Buffer Address */
1481     OUT_BATCH(batch, 0);
1482     OUT_BATCH(batch, 0);
1483     /* DW4-7. Surface structure */
1484     OUT_BATCH(batch, 0);
1485     OUT_BATCH(batch, 0);
1486     OUT_BATCH(batch, 0);
1487     OUT_BATCH(batch, 0);
1488     ADVANCE_BATCH(batch);
1489
1490     /* Update the Hier Depth buffer */
1491     BEGIN_BATCH(batch, 5);
1492     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1493     OUT_BATCH(batch, 0);
1494     OUT_BATCH(batch, 0);
1495     OUT_BATCH(batch, 0);
1496     OUT_BATCH(batch, 0);
1497     ADVANCE_BATCH(batch);
1498
1499     /* Update the stencil buffer */
1500     BEGIN_BATCH(batch, 5);
1501     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1502     OUT_BATCH(batch, 0);
1503     OUT_BATCH(batch, 0);
1504     OUT_BATCH(batch, 0);
1505     OUT_BATCH(batch, 0);
1506     ADVANCE_BATCH(batch);
1507
1508     BEGIN_BATCH(batch, 3);
1509     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1510     OUT_BATCH(batch, 0);
1511     OUT_BATCH(batch, 0);
1512     ADVANCE_BATCH(batch);
1513 }
1514
1515 static void
1516 gen9_emit_depth_stencil_state(VADriverContextP ctx)
1517 {
1518     struct i965_driver_data *i965 = i965_driver_data(ctx);
1519     struct intel_batchbuffer *batch = i965->batch;
1520
1521     BEGIN_BATCH(batch, 3);
1522     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1523     OUT_BATCH(batch, 0);
1524     OUT_BATCH(batch, 0);
1525     ADVANCE_BATCH(batch);
1526 }
1527
1528 static void
1529 gen9_emit_wm_hz_op(VADriverContextP ctx)
1530 {
1531     struct i965_driver_data *i965 = i965_driver_data(ctx);
1532     struct intel_batchbuffer *batch = i965->batch;
1533
1534     BEGIN_BATCH(batch, 5);
1535     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1536     OUT_BATCH(batch, 0);
1537     OUT_BATCH(batch, 0);
1538     OUT_BATCH(batch, 0);
1539     OUT_BATCH(batch, 0);
1540     ADVANCE_BATCH(batch);
1541 }
1542
1543 static void
1544 gen9_emit_viewport_state_pointers(VADriverContextP ctx)
1545 {
1546     struct i965_driver_data *i965 = i965_driver_data(ctx);
1547     struct intel_batchbuffer *batch = i965->batch;
1548     struct i965_render_state *render_state = &i965->render_state;
1549
1550     BEGIN_BATCH(batch, 2);
1551     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1552     OUT_BATCH(batch, render_state->cc_viewport_offset);
1553     ADVANCE_BATCH(batch);
1554
1555     BEGIN_BATCH(batch, 2);
1556     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1557     OUT_BATCH(batch, 0);
1558     ADVANCE_BATCH(batch);
1559 }
1560
1561 static void
1562 gen9_emit_sampler_state_pointers(VADriverContextP ctx)
1563 {
1564     struct i965_driver_data *i965 = i965_driver_data(ctx);
1565     struct intel_batchbuffer *batch = i965->batch;
1566     struct i965_render_state *render_state = &i965->render_state;
1567
1568     BEGIN_BATCH(batch, 2);
1569     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1570     OUT_BATCH(batch, render_state->sampler_offset);
1571     ADVANCE_BATCH(batch);
1572 }
1573
1574
1575 static void
1576 gen9_emit_drawing_rectangle(VADriverContextP ctx)
1577 {
1578     i965_render_drawing_rectangle(ctx);
1579 }
1580
1581 static void
1582 gen9_render_emit_states(VADriverContextP ctx)
1583 {
1584     struct i965_driver_data *i965 = i965_driver_data(ctx);
1585     struct intel_batchbuffer *batch = i965->batch;
1586
1587     intel_batchbuffer_start_atomic(batch, 0x1000);
1588     intel_batchbuffer_emit_mi_flush(batch);
1589     gen9_emit_invarient_states(ctx);
1590     gen9_emit_state_base_address(ctx);
1591     gen9_emit_viewport_state_pointers(ctx);
1592     gen9_emit_urb(ctx);
1593     gen9_emit_cc_state_pointers(ctx);
1594     gen9_emit_sampler_state_pointers(ctx);
1595     gen9_emit_wm_hz_op(ctx);
1596     gen9_emit_bypass_state(ctx);
1597     gen9_emit_vs_state(ctx);
1598     gen9_emit_clip_state(ctx);
1599     gen9_emit_sf_state(ctx);
1600     gen9_emit_depth_stencil_state(ctx);
1601     gen9_emit_depth_buffer_state(ctx);
1602     gen9_emit_drawing_rectangle(ctx);
1603     gen9_emit_vertex_element_state(ctx);
1604     /* clear background */
1605     gen9_emit_wm_state(ctx, PS_CLEAR_KERNEL);
1606     gen9_emit_vertices(ctx, 12 * sizeof(float));
1607     /* draw the image */
1608     gen9_emit_wm_state(ctx, PS_KERNEL);
1609     gen9_emit_vertices(ctx, 0);
1610     intel_batchbuffer_end_atomic(batch);
1611 }
1612
1613 static void
1614 gen9_subpicture_render_emit_states(VADriverContextP ctx)
1615 {
1616     struct i965_driver_data *i965 = i965_driver_data(ctx);
1617     struct intel_batchbuffer *batch = i965->batch;
1618
1619     intel_batchbuffer_start_atomic(batch, 0x1000);
1620     intel_batchbuffer_emit_mi_flush(batch);
1621     gen9_emit_invarient_states(ctx);
1622     gen9_emit_state_base_address(ctx);
1623     gen9_emit_viewport_state_pointers(ctx);
1624     gen9_emit_urb(ctx);
1625     gen9_emit_cc_state_pointers(ctx);
1626     gen9_emit_sampler_state_pointers(ctx);
1627     gen9_emit_wm_hz_op(ctx);
1628     gen9_emit_bypass_state(ctx);
1629     gen9_emit_vs_state(ctx);
1630     gen9_emit_clip_state(ctx);
1631     gen9_emit_sf_state(ctx);
1632     gen9_emit_depth_stencil_state(ctx);
1633     gen9_emit_wm_state(ctx, PS_SUBPIC_KERNEL);
1634     gen9_emit_depth_buffer_state(ctx);
1635     gen9_emit_drawing_rectangle(ctx);
1636     gen9_emit_vertex_element_state(ctx);
1637     gen9_emit_vertices(ctx, 0);
1638     intel_batchbuffer_end_atomic(batch);
1639 }
1640
1641 static void
1642 gen9_render_put_surface(
1643     VADriverContextP   ctx,
1644     struct object_surface *obj_surface,
1645     const VARectangle *src_rect,
1646     const VARectangle *dst_rect,
1647     unsigned int       flags
1648 )
1649 {
1650     struct i965_driver_data *i965 = i965_driver_data(ctx);
1651     struct intel_batchbuffer *batch = i965->batch;
1652
1653     gen9_render_initialize(ctx);
1654     gen9_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1655     gen9_render_emit_states(ctx);
1656     intel_batchbuffer_flush(batch);
1657 }
1658
1659 static void
1660 gen9_subpicture_render_blend_state(VADriverContextP ctx)
1661 {
1662     struct i965_driver_data *i965 = i965_driver_data(ctx);
1663     struct i965_render_state *render_state = &i965->render_state;
1664     struct gen8_global_blend_state *global_blend_state;
1665     struct gen8_blend_state_rt *blend_state;
1666     unsigned char *cc_ptr;
1667
1668     dri_bo_map(render_state->dynamic_state.bo, 1);
1669     assert(render_state->dynamic_state.bo->virtual);
1670
1671     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1672              render_state->blend_state_offset;
1673
1674     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1675
1676     memset(global_blend_state, 0, render_state->blend_state_size);
1677     /* Global blend state + blend_state for Render Target */
1678     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1679     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1680     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1681     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1682     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1683     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1684     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1685     blend_state->blend0.colorbuf_blend = 1;
1686     blend_state->blend1.post_blend_clamp_enable = 1;
1687     blend_state->blend1.pre_blend_clamp_enable = 1;
1688     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1689
1690     dri_bo_unmap(render_state->dynamic_state.bo);
1691 }
1692
1693 static void
1694 gen9_subpic_render_upload_constants(VADriverContextP ctx,
1695                                     struct object_surface *obj_surface)
1696 {
1697     struct i965_driver_data *i965 = i965_driver_data(ctx);
1698     struct i965_render_state *render_state = &i965->render_state;
1699     float *constant_buffer;
1700     float global_alpha = 1.0;
1701     unsigned int index = obj_surface->subpic_render_idx;
1702     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1703     unsigned char *cc_ptr;
1704
1705     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1706         global_alpha = obj_subpic->global_alpha;
1707     }
1708
1709
1710     dri_bo_map(render_state->dynamic_state.bo, 1);
1711     assert(render_state->dynamic_state.bo->virtual);
1712
1713     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1714              render_state->curbe_offset;
1715
1716     constant_buffer = (float *) cc_ptr;
1717     *constant_buffer = global_alpha;
1718
1719     dri_bo_unmap(render_state->dynamic_state.bo);
1720 }
1721
1722 static void
1723 gen9_subpicture_render_setup_states(
1724     VADriverContextP   ctx,
1725     struct object_surface *obj_surface,
1726     const VARectangle *src_rect,
1727     const VARectangle *dst_rect
1728 )
1729 {
1730     gen9_render_dest_surface_state(ctx, 0);
1731     gen9_subpic_render_src_surfaces_state(ctx, obj_surface);
1732     gen9_render_sampler(ctx);
1733     gen9_render_cc_viewport(ctx);
1734     gen9_render_color_calc_state(ctx);
1735     gen9_subpicture_render_blend_state(ctx);
1736     gen9_subpic_render_upload_constants(ctx, obj_surface);
1737     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1738 }
1739
1740 static void
1741 gen9_render_put_subpicture(
1742     VADriverContextP   ctx,
1743     struct object_surface *obj_surface,
1744     const VARectangle *src_rect,
1745     const VARectangle *dst_rect
1746 )
1747 {
1748     struct i965_driver_data *i965 = i965_driver_data(ctx);
1749     struct intel_batchbuffer *batch = i965->batch;
1750     unsigned int index = obj_surface->subpic_render_idx;
1751     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1752
1753     assert(obj_subpic);
1754     gen9_render_initialize(ctx);
1755     gen9_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1756     gen9_subpicture_render_emit_states(ctx);
1757     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1758     intel_batchbuffer_flush(batch);
1759 }
1760
1761 static void
1762 gen9_render_terminate(VADriverContextP ctx)
1763 {
1764     struct i965_driver_data *i965 = i965_driver_data(ctx);
1765     struct i965_render_state *render_state = &i965->render_state;
1766
1767     dri_bo_unreference(render_state->vb.vertex_buffer);
1768     render_state->vb.vertex_buffer = NULL;
1769
1770     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1771     render_state->wm.surface_state_binding_table_bo = NULL;
1772
1773     if (render_state->instruction_state.bo) {
1774         dri_bo_unreference(render_state->instruction_state.bo);
1775         render_state->instruction_state.bo = NULL;
1776     }
1777
1778     if (render_state->dynamic_state.bo) {
1779         dri_bo_unreference(render_state->dynamic_state.bo);
1780         render_state->dynamic_state.bo = NULL;
1781     }
1782
1783     if (render_state->indirect_state.bo) {
1784         dri_bo_unreference(render_state->indirect_state.bo);
1785         render_state->indirect_state.bo = NULL;
1786     }
1787
1788     if (render_state->draw_region) {
1789         dri_bo_unreference(render_state->draw_region->bo);
1790         free(render_state->draw_region);
1791         render_state->draw_region = NULL;
1792     }
1793 }
1794
1795 bool
1796 gen9_render_init(VADriverContextP ctx)
1797 {
1798     struct i965_driver_data *i965 = i965_driver_data(ctx);
1799     struct i965_render_state *render_state = &i965->render_state;
1800     int i, kernel_size;
1801     unsigned int kernel_offset, end_offset;
1802     unsigned char *kernel_ptr;
1803     struct i965_kernel *kernel;
1804
1805     render_state->render_put_surface = gen9_render_put_surface;
1806     render_state->render_put_subpicture = gen9_render_put_subpicture;
1807     render_state->render_terminate = gen9_render_terminate;
1808
1809     memcpy(render_state->render_kernels, render_kernels_gen9,
1810            sizeof(render_state->render_kernels));
1811
1812     kernel_size = 4096;
1813
1814     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1815         kernel = &render_state->render_kernels[i];
1816
1817         if (!kernel->size)
1818             continue;
1819
1820         kernel_size += ALIGN(kernel->size, ALIGNMENT);
1821     }
1822
1823     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1824                                                       "kernel shader",
1825                                                       kernel_size,
1826                                                       0x1000);
1827     if (render_state->instruction_state.bo == NULL) {
1828         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1829         return false;
1830     }
1831
1832     assert(render_state->instruction_state.bo);
1833
1834     render_state->instruction_state.bo_size = kernel_size;
1835     render_state->instruction_state.end_offset = 0;
1836     end_offset = 0;
1837
1838     dri_bo_map(render_state->instruction_state.bo, 1);
1839     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1840     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1841         kernel = &render_state->render_kernels[i];
1842         kernel_offset = end_offset;
1843         kernel->kernel_offset = kernel_offset;
1844
1845         if (!kernel->size)
1846             continue;
1847
1848         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1849
1850         end_offset += ALIGN(kernel->size, ALIGNMENT);
1851     }
1852
1853     render_state->instruction_state.end_offset = end_offset;
1854
1855     dri_bo_unmap(render_state->instruction_state.bo);
1856
1857     return true;
1858 }