OSDN Git Service

render: clear background using 3D pipeline on GEN8+
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47 #include "i965_yuv_coefs.h"
48
49 #include "i965_render.h"
50 #include "i965_post_processing.h"
51
52 #define SF_KERNEL_NUM_GRF       16
53 #define SF_MAX_THREADS          1
54
55 static const uint32_t sf_kernel_static[][4] = {
56 #include "shaders/render/exa_sf.g4b"
57 };
58
59 #define PS_KERNEL_NUM_GRF       48
60 #define PS_MAX_THREADS          32
61
62 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
63
64 static const uint32_t ps_kernel_static[][4] = {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = {
85 #include "shaders/render/exa_wm_xy.g4b.gen5"
86 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
87 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
88 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = {
93 #include "shaders/render/exa_wm_xy.g4b.gen5"
94 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
95 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
96 #include "shaders/render/exa_wm_write.g4b.gen5"
97 };
98
99 /* programs for Sandybridge */
100 static const uint32_t sf_kernel_static_gen6[][4] = {
101 };
102
103 static const uint32_t ps_kernel_static_gen6[][4] = {
104 #include "shaders/render/exa_wm_src_affine.g6b"
105 #include "shaders/render/exa_wm_src_sample_planar.g6b"
106 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
107 #include "shaders/render/exa_wm_yuv_rgb.g6b"
108 #include "shaders/render/exa_wm_write.g6b"
109 };
110
111 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
112 #include "shaders/render/exa_wm_src_affine.g6b"
113 #include "shaders/render/exa_wm_src_sample_argb.g6b"
114 #include "shaders/render/exa_wm_write.g6b"
115 };
116
117 /* programs for Ivybridge */
118 static const uint32_t sf_kernel_static_gen7[][4] = {
119 };
120
121 static const uint32_t ps_kernel_static_gen7[][4] = {
122 #include "shaders/render/exa_wm_src_affine.g7b"
123 #include "shaders/render/exa_wm_src_sample_planar.g7b"
124 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
125 #include "shaders/render/exa_wm_yuv_rgb.g7b"
126 #include "shaders/render/exa_wm_write.g7b"
127 };
128
129 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
130 #include "shaders/render/exa_wm_src_affine.g7b"
131 #include "shaders/render/exa_wm_src_sample_argb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 /* Programs for Haswell */
136 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
137 #include "shaders/render/exa_wm_src_affine.g7b"
138 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
139 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
140 #include "shaders/render/exa_wm_yuv_rgb.g7b"
141 #include "shaders/render/exa_wm_write.g7b"
142 };
143
144
145 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
146
147 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
148 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
149
150 static uint32_t float_to_uint(float f)
151 {
152     union {
153         uint32_t i;
154         float f;
155     } x;
156
157     x.f = f;
158     return x.i;
159 }
160
161 enum {
162     SF_KERNEL = 0,
163     PS_KERNEL,
164     PS_SUBPIC_KERNEL,
165     PS_CLEAR_KERNEL
166 };
167
168 static struct i965_kernel render_kernels_gen4[] = {
169     {
170         "SF",
171         SF_KERNEL,
172         sf_kernel_static,
173         sizeof(sf_kernel_static),
174         NULL
175     },
176     {
177         "PS",
178         PS_KERNEL,
179         ps_kernel_static,
180         sizeof(ps_kernel_static),
181         NULL
182     },
183
184     {
185         "PS_SUBPIC",
186         PS_SUBPIC_KERNEL,
187         ps_subpic_kernel_static,
188         sizeof(ps_subpic_kernel_static),
189         NULL
190     },
191
192     // Not used
193     {
194         "PS_CLEAR",
195         PS_CLEAR_KERNEL,
196         NULL,
197         0,
198         0
199     }
200 };
201
202 static struct i965_kernel render_kernels_gen5[] = {
203     {
204         "SF",
205         SF_KERNEL,
206         sf_kernel_static_gen5,
207         sizeof(sf_kernel_static_gen5),
208         NULL
209     },
210     {
211         "PS",
212         PS_KERNEL,
213         ps_kernel_static_gen5,
214         sizeof(ps_kernel_static_gen5),
215         NULL
216     },
217
218     {
219         "PS_SUBPIC",
220         PS_SUBPIC_KERNEL,
221         ps_subpic_kernel_static_gen5,
222         sizeof(ps_subpic_kernel_static_gen5),
223         NULL
224     },
225
226     // Not used
227     {
228         "PS_CLEAR",
229         PS_CLEAR_KERNEL,
230         NULL,
231         0,
232         0
233     }
234 };
235
236 static struct i965_kernel render_kernels_gen6[] = {
237     {
238         "SF",
239         SF_KERNEL,
240         sf_kernel_static_gen6,
241         sizeof(sf_kernel_static_gen6),
242         NULL
243     },
244     {
245         "PS",
246         PS_KERNEL,
247         ps_kernel_static_gen6,
248         sizeof(ps_kernel_static_gen6),
249         NULL
250     },
251
252     {
253         "PS_SUBPIC",
254         PS_SUBPIC_KERNEL,
255         ps_subpic_kernel_static_gen6,
256         sizeof(ps_subpic_kernel_static_gen6),
257         NULL
258     },
259
260     // Not used
261     {
262         "PS_CLEAR",
263         PS_CLEAR_KERNEL,
264         NULL,
265         0,
266         0
267     }
268 };
269
270 static struct i965_kernel render_kernels_gen7[] = {
271     {
272         "SF",
273         SF_KERNEL,
274         sf_kernel_static_gen7,
275         sizeof(sf_kernel_static_gen7),
276         NULL
277     },
278     {
279         "PS",
280         PS_KERNEL,
281         ps_kernel_static_gen7,
282         sizeof(ps_kernel_static_gen7),
283         NULL
284     },
285
286     {
287         "PS_SUBPIC",
288         PS_SUBPIC_KERNEL,
289         ps_subpic_kernel_static_gen7,
290         sizeof(ps_subpic_kernel_static_gen7),
291         NULL
292     },
293
294     // Not used
295     {
296         "PS_CLEAR",
297         PS_CLEAR_KERNEL,
298         NULL,
299         0,
300         0
301     }
302 };
303
304 static struct i965_kernel render_kernels_gen7_haswell[] = {
305     {
306         "SF",
307         SF_KERNEL,
308         sf_kernel_static_gen7,
309         sizeof(sf_kernel_static_gen7),
310         NULL
311     },
312     {
313         "PS",
314         PS_KERNEL,
315         ps_kernel_static_gen7_haswell,
316         sizeof(ps_kernel_static_gen7_haswell),
317         NULL
318     },
319
320     {
321         "PS_SUBPIC",
322         PS_SUBPIC_KERNEL,
323         ps_subpic_kernel_static_gen7,
324         sizeof(ps_subpic_kernel_static_gen7),
325         NULL
326     },
327
328     // Not used
329     {
330         "PS_CLEAR",
331         PS_CLEAR_KERNEL,
332         NULL,
333         0,
334         0
335     }
336 };
337
338 #define URB_VS_ENTRIES        8
339 #define URB_VS_ENTRY_SIZE     1
340
341 #define URB_GS_ENTRIES        0
342 #define URB_GS_ENTRY_SIZE     0
343
344 #define URB_CLIP_ENTRIES      0
345 #define URB_CLIP_ENTRY_SIZE   0
346
347 #define URB_SF_ENTRIES        1
348 #define URB_SF_ENTRY_SIZE     2
349
350 #define URB_CS_ENTRIES        4
351 #define URB_CS_ENTRY_SIZE     4
352
353 static void
354 i965_render_vs_unit(VADriverContextP ctx)
355 {
356     struct i965_driver_data *i965 = i965_driver_data(ctx);
357     struct i965_render_state *render_state = &i965->render_state;
358     struct i965_vs_unit_state *vs_state;
359
360     dri_bo_map(render_state->vs.state, 1);
361     assert(render_state->vs.state->virtual);
362     vs_state = render_state->vs.state->virtual;
363     memset(vs_state, 0, sizeof(*vs_state));
364
365     if (IS_IRONLAKE(i965->intel.device_info))
366         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
367     else
368         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
369
370     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
371     vs_state->vs6.vs_enable = 0;
372     vs_state->vs6.vert_cache_disable = 1;
373
374     dri_bo_unmap(render_state->vs.state);
375 }
376
377 static void
378 i965_render_sf_unit(VADriverContextP ctx)
379 {
380     struct i965_driver_data *i965 = i965_driver_data(ctx);
381     struct i965_render_state *render_state = &i965->render_state;
382     struct i965_sf_unit_state *sf_state;
383
384     dri_bo_map(render_state->sf.state, 1);
385     assert(render_state->sf.state->virtual);
386     sf_state = render_state->sf.state->virtual;
387     memset(sf_state, 0, sizeof(*sf_state));
388
389     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
390     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
391
392     sf_state->sf1.single_program_flow = 1; /* XXX */
393     sf_state->sf1.binding_table_entry_count = 0;
394     sf_state->sf1.thread_priority = 0;
395     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
396     sf_state->sf1.illegal_op_exception_enable = 1;
397     sf_state->sf1.mask_stack_exception_enable = 1;
398     sf_state->sf1.sw_exception_enable = 1;
399
400     /* scratch space is not used in our kernel */
401     sf_state->thread2.per_thread_scratch_space = 0;
402     sf_state->thread2.scratch_space_base_pointer = 0;
403
404     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
405     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
406     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
407     sf_state->thread3.urb_entry_read_offset = 0;
408     sf_state->thread3.dispatch_grf_start_reg = 3;
409
410     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
411     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
412     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
413     sf_state->thread4.stats_enable = 1;
414
415     sf_state->sf5.viewport_transform = 0; /* skip viewport */
416
417     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
418     sf_state->sf6.scissor = 0;
419
420     sf_state->sf7.trifan_pv = 2;
421
422     sf_state->sf6.dest_org_vbias = 0x8;
423     sf_state->sf6.dest_org_hbias = 0x8;
424
425     dri_bo_emit_reloc(render_state->sf.state,
426                       I915_GEM_DOMAIN_INSTRUCTION, 0,
427                       sf_state->thread0.grf_reg_count << 1,
428                       offsetof(struct i965_sf_unit_state, thread0),
429                       render_state->render_kernels[SF_KERNEL].bo);
430
431     dri_bo_unmap(render_state->sf.state);
432 }
433
434 static void
435 i965_render_sampler(VADriverContextP ctx)
436 {
437     struct i965_driver_data *i965 = i965_driver_data(ctx);
438     struct i965_render_state *render_state = &i965->render_state;
439     struct i965_sampler_state *sampler_state;
440     int i;
441
442     assert(render_state->wm.sampler_count > 0);
443     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
444
445     dri_bo_map(render_state->wm.sampler, 1);
446     assert(render_state->wm.sampler->virtual);
447     sampler_state = render_state->wm.sampler->virtual;
448     for (i = 0; i < render_state->wm.sampler_count; i++) {
449         memset(sampler_state, 0, sizeof(*sampler_state));
450         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
451         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
452         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
453         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
454         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
455         sampler_state++;
456     }
457
458     dri_bo_unmap(render_state->wm.sampler);
459 }
460 static void
461 i965_subpic_render_wm_unit(VADriverContextP ctx)
462 {
463     struct i965_driver_data *i965 = i965_driver_data(ctx);
464     struct i965_render_state *render_state = &i965->render_state;
465     struct i965_wm_unit_state *wm_state;
466
467     assert(render_state->wm.sampler);
468
469     dri_bo_map(render_state->wm.state, 1);
470     assert(render_state->wm.state->virtual);
471     wm_state = render_state->wm.state->virtual;
472     memset(wm_state, 0, sizeof(*wm_state));
473
474     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
475     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
476
477     wm_state->thread1.single_program_flow = 1; /* XXX */
478
479     if (IS_IRONLAKE(i965->intel.device_info))
480         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
481     else
482         wm_state->thread1.binding_table_entry_count = 7;
483
484     wm_state->thread2.scratch_space_base_pointer = 0;
485     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
486
487     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
488     wm_state->thread3.const_urb_entry_read_length = 4;
489     wm_state->thread3.const_urb_entry_read_offset = 0;
490     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
491     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
492
493     wm_state->wm4.stats_enable = 0;
494     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
495
496     if (IS_IRONLAKE(i965->intel.device_info)) {
497         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
498     } else {
499         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
500     }
501
502     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
503     wm_state->wm5.thread_dispatch_enable = 1;
504     wm_state->wm5.enable_16_pix = 1;
505     wm_state->wm5.enable_8_pix = 0;
506     wm_state->wm5.early_depth_test = 1;
507
508     dri_bo_emit_reloc(render_state->wm.state,
509                       I915_GEM_DOMAIN_INSTRUCTION, 0,
510                       wm_state->thread0.grf_reg_count << 1,
511                       offsetof(struct i965_wm_unit_state, thread0),
512                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
513
514     dri_bo_emit_reloc(render_state->wm.state,
515                       I915_GEM_DOMAIN_INSTRUCTION, 0,
516                       wm_state->wm4.sampler_count << 2,
517                       offsetof(struct i965_wm_unit_state, wm4),
518                       render_state->wm.sampler);
519
520     dri_bo_unmap(render_state->wm.state);
521 }
522
523
524 static void
525 i965_render_wm_unit(VADriverContextP ctx)
526 {
527     struct i965_driver_data *i965 = i965_driver_data(ctx);
528     struct i965_render_state *render_state = &i965->render_state;
529     struct i965_wm_unit_state *wm_state;
530
531     assert(render_state->wm.sampler);
532
533     dri_bo_map(render_state->wm.state, 1);
534     assert(render_state->wm.state->virtual);
535     wm_state = render_state->wm.state->virtual;
536     memset(wm_state, 0, sizeof(*wm_state));
537
538     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
539     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
540
541     wm_state->thread1.single_program_flow = 1; /* XXX */
542
543     if (IS_IRONLAKE(i965->intel.device_info))
544         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
545     else
546         wm_state->thread1.binding_table_entry_count = 7;
547
548     wm_state->thread2.scratch_space_base_pointer = 0;
549     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
550
551     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
552     wm_state->thread3.const_urb_entry_read_length = 4;
553     wm_state->thread3.const_urb_entry_read_offset = 0;
554     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
555     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
556
557     wm_state->wm4.stats_enable = 0;
558     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
559
560     if (IS_IRONLAKE(i965->intel.device_info)) {
561         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
562     } else {
563         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
564     }
565
566     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
567     wm_state->wm5.thread_dispatch_enable = 1;
568     wm_state->wm5.enable_16_pix = 1;
569     wm_state->wm5.enable_8_pix = 0;
570     wm_state->wm5.early_depth_test = 1;
571
572     dri_bo_emit_reloc(render_state->wm.state,
573                       I915_GEM_DOMAIN_INSTRUCTION, 0,
574                       wm_state->thread0.grf_reg_count << 1,
575                       offsetof(struct i965_wm_unit_state, thread0),
576                       render_state->render_kernels[PS_KERNEL].bo);
577
578     dri_bo_emit_reloc(render_state->wm.state,
579                       I915_GEM_DOMAIN_INSTRUCTION, 0,
580                       wm_state->wm4.sampler_count << 2,
581                       offsetof(struct i965_wm_unit_state, wm4),
582                       render_state->wm.sampler);
583
584     dri_bo_unmap(render_state->wm.state);
585 }
586
587 static void
588 i965_render_cc_viewport(VADriverContextP ctx)
589 {
590     struct i965_driver_data *i965 = i965_driver_data(ctx);
591     struct i965_render_state *render_state = &i965->render_state;
592     struct i965_cc_viewport *cc_viewport;
593
594     dri_bo_map(render_state->cc.viewport, 1);
595     assert(render_state->cc.viewport->virtual);
596     cc_viewport = render_state->cc.viewport->virtual;
597     memset(cc_viewport, 0, sizeof(*cc_viewport));
598
599     cc_viewport->min_depth = -1.e35;
600     cc_viewport->max_depth = 1.e35;
601
602     dri_bo_unmap(render_state->cc.viewport);
603 }
604
605 static void
606 i965_subpic_render_cc_unit(VADriverContextP ctx)
607 {
608     struct i965_driver_data *i965 = i965_driver_data(ctx);
609     struct i965_render_state *render_state = &i965->render_state;
610     struct i965_cc_unit_state *cc_state;
611
612     assert(render_state->cc.viewport);
613
614     dri_bo_map(render_state->cc.state, 1);
615     assert(render_state->cc.state->virtual);
616     cc_state = render_state->cc.state->virtual;
617     memset(cc_state, 0, sizeof(*cc_state));
618
619     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
620     cc_state->cc2.depth_test = 0;       /* disable depth test */
621     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
622     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
623     cc_state->cc3.blend_enable = 1;     /* enable color blend */
624     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
625     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
626     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
627     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
628
629     cc_state->cc5.dither_enable = 0;    /* disable dither */
630     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
631     cc_state->cc5.statistics_enable = 1;
632     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
633     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
634     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
635
636     cc_state->cc6.clamp_post_alpha_blend = 0;
637     cc_state->cc6.clamp_pre_alpha_blend  = 0;
638
639     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
640     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
641     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
642     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
643
644     /*alpha test reference*/
645     cc_state->cc7.alpha_ref.f = 0.0 ;
646
647
648     dri_bo_emit_reloc(render_state->cc.state,
649                       I915_GEM_DOMAIN_INSTRUCTION, 0,
650                       0,
651                       offsetof(struct i965_cc_unit_state, cc4),
652                       render_state->cc.viewport);
653
654     dri_bo_unmap(render_state->cc.state);
655 }
656
657
658 static void
659 i965_render_cc_unit(VADriverContextP ctx)
660 {
661     struct i965_driver_data *i965 = i965_driver_data(ctx);
662     struct i965_render_state *render_state = &i965->render_state;
663     struct i965_cc_unit_state *cc_state;
664
665     assert(render_state->cc.viewport);
666
667     dri_bo_map(render_state->cc.state, 1);
668     assert(render_state->cc.state->virtual);
669     cc_state = render_state->cc.state->virtual;
670     memset(cc_state, 0, sizeof(*cc_state));
671
672     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
673     cc_state->cc2.depth_test = 0;       /* disable depth test */
674     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
675     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
676     cc_state->cc3.blend_enable = 0;     /* disable color blend */
677     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
678     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
679
680     cc_state->cc5.dither_enable = 0;    /* disable dither */
681     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
682     cc_state->cc5.statistics_enable = 1;
683     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
684     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
685     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
686
687     dri_bo_emit_reloc(render_state->cc.state,
688                       I915_GEM_DOMAIN_INSTRUCTION, 0,
689                       0,
690                       offsetof(struct i965_cc_unit_state, cc4),
691                       render_state->cc.viewport);
692
693     dri_bo_unmap(render_state->cc.state);
694 }
695
696 static void
697 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
698 {
699     switch (tiling) {
700     case I915_TILING_NONE:
701         ss->ss3.tiled_surface = 0;
702         ss->ss3.tile_walk = 0;
703         break;
704     case I915_TILING_X:
705         ss->ss3.tiled_surface = 1;
706         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
707         break;
708     case I915_TILING_Y:
709         ss->ss3.tiled_surface = 1;
710         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
711         break;
712     }
713 }
714
715 static void
716 i965_render_set_surface_state(
717     struct i965_surface_state *ss,
718     dri_bo                    *bo,
719     unsigned long              offset,
720     unsigned int               width,
721     unsigned int               height,
722     unsigned int               pitch,
723     unsigned int               format,
724     unsigned int               flags
725 )
726 {
727     unsigned int tiling;
728     unsigned int swizzle;
729
730     memset(ss, 0, sizeof(*ss));
731
732     switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
733     case VA_BOTTOM_FIELD:
734         ss->ss0.vert_line_stride_ofs = 1;
735         /* fall-through */
736     case VA_TOP_FIELD:
737         ss->ss0.vert_line_stride = 1;
738         height /= 2;
739         break;
740     }
741
742     ss->ss0.surface_type = I965_SURFACE_2D;
743     ss->ss0.surface_format = format;
744     ss->ss0.color_blend = 1;
745
746     ss->ss1.base_addr = bo->offset + offset;
747
748     ss->ss2.width = width - 1;
749     ss->ss2.height = height - 1;
750
751     ss->ss3.pitch = pitch - 1;
752
753     dri_bo_get_tiling(bo, &tiling, &swizzle);
754     i965_render_set_surface_tiling(ss, tiling);
755 }
756
757 static void
758 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
759 {
760     switch (tiling) {
761     case I915_TILING_NONE:
762         ss->ss0.tiled_surface = 0;
763         ss->ss0.tile_walk = 0;
764         break;
765     case I915_TILING_X:
766         ss->ss0.tiled_surface = 1;
767         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
768         break;
769     case I915_TILING_Y:
770         ss->ss0.tiled_surface = 1;
771         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
772         break;
773     }
774 }
775
776 /* Set "Shader Channel Select" */
777 void
778 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
779 {
780     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
781     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
782     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
783     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
784 }
785
786 static void
787 gen7_render_set_surface_state(
788     struct gen7_surface_state *ss,
789     dri_bo                    *bo,
790     unsigned long              offset,
791     int                        width,
792     int                        height,
793     int                        pitch,
794     int                        format,
795     unsigned int               flags
796 )
797 {
798     unsigned int tiling;
799     unsigned int swizzle;
800
801     memset(ss, 0, sizeof(*ss));
802
803     switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
804     case VA_BOTTOM_FIELD:
805         ss->ss0.vert_line_stride_ofs = 1;
806         /* fall-through */
807     case VA_TOP_FIELD:
808         ss->ss0.vert_line_stride = 1;
809         height /= 2;
810         break;
811     }
812
813     ss->ss0.surface_type = I965_SURFACE_2D;
814     ss->ss0.surface_format = format;
815
816     ss->ss1.base_addr = bo->offset + offset;
817
818     ss->ss2.width = width - 1;
819     ss->ss2.height = height - 1;
820
821     ss->ss3.pitch = pitch - 1;
822
823     dri_bo_get_tiling(bo, &tiling, &swizzle);
824     gen7_render_set_surface_tiling(ss, tiling);
825 }
826
827
828 static void
829 i965_render_src_surface_state(
830     VADriverContextP ctx,
831     int              index,
832     dri_bo          *region,
833     unsigned long    offset,
834     int              w,
835     int              h,
836     int              pitch,
837     int              format,
838     unsigned int     flags
839 )
840 {
841     struct i965_driver_data *i965 = i965_driver_data(ctx);
842     struct i965_render_state *render_state = &i965->render_state;
843     void *ss;
844     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
845
846     assert(index < MAX_RENDER_SURFACES);
847
848     dri_bo_map(ss_bo, 1);
849     assert(ss_bo->virtual);
850     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
851
852     if (IS_GEN7(i965->intel.device_info)) {
853         gen7_render_set_surface_state(ss,
854                                       region, offset,
855                                       w, h,
856                                       pitch, format, flags);
857         if (IS_HASWELL(i965->intel.device_info))
858             gen7_render_set_surface_scs(ss);
859         dri_bo_emit_reloc(ss_bo,
860                           I915_GEM_DOMAIN_SAMPLER, 0,
861                           offset,
862                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
863                           region);
864     } else {
865         i965_render_set_surface_state(ss,
866                                       region, offset,
867                                       w, h,
868                                       pitch, format, flags);
869         dri_bo_emit_reloc(ss_bo,
870                           I915_GEM_DOMAIN_SAMPLER, 0,
871                           offset,
872                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
873                           region);
874     }
875
876     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
877     dri_bo_unmap(ss_bo);
878     render_state->wm.sampler_count++;
879 }
880
881 static void
882 i965_render_src_surfaces_state(
883     VADriverContextP ctx,
884     struct object_surface *obj_surface,
885     unsigned int     flags
886 )
887 {
888     int region_pitch;
889     int rw, rh;
890     dri_bo *region;
891
892     region_pitch = obj_surface->width;
893     rw = obj_surface->orig_width;
894     rh = obj_surface->orig_height;
895     region = obj_surface->bo;
896
897     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
898     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
899
900     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
901         return;
902
903     if (obj_surface->fourcc == VA_FOURCC_NV12) {
904         i965_render_src_surface_state(ctx, 3, region,
905                                       region_pitch * obj_surface->y_cb_offset,
906                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
907                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
908         i965_render_src_surface_state(ctx, 4, region,
909                                       region_pitch * obj_surface->y_cb_offset,
910                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
911                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
912     } else {
913         i965_render_src_surface_state(ctx, 3, region,
914                                       region_pitch * obj_surface->y_cb_offset,
915                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
916                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
917         i965_render_src_surface_state(ctx, 4, region,
918                                       region_pitch * obj_surface->y_cb_offset,
919                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
920                                       I965_SURFACEFORMAT_R8_UNORM, flags);
921         i965_render_src_surface_state(ctx, 5, region,
922                                       region_pitch * obj_surface->y_cr_offset,
923                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
924                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
925         i965_render_src_surface_state(ctx, 6, region,
926                                       region_pitch * obj_surface->y_cr_offset,
927                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
928                                       I965_SURFACEFORMAT_R8_UNORM, flags);
929     }
930 }
931
932 static void
933 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
934                                       struct object_surface *obj_surface)
935 {
936     dri_bo *subpic_region;
937     unsigned int index = obj_surface->subpic_render_idx;
938     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
939     struct object_image *obj_image = obj_subpic->obj_image;
940
941     assert(obj_surface);
942     assert(obj_surface->bo);
943     subpic_region = obj_image->bo;
944     /*subpicture surface*/
945     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
946     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
947 }
948
949 static void
950 i965_render_dest_surface_state(VADriverContextP ctx, int index)
951 {
952     struct i965_driver_data *i965 = i965_driver_data(ctx);
953     struct i965_render_state *render_state = &i965->render_state;
954     struct intel_region *dest_region = render_state->draw_region;
955     void *ss;
956     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
957     int format;
958     assert(index < MAX_RENDER_SURFACES);
959
960     if (dest_region->cpp == 2) {
961         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
962     } else {
963         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
964     }
965
966     dri_bo_map(ss_bo, 1);
967     assert(ss_bo->virtual);
968     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
969
970     if (IS_GEN7(i965->intel.device_info)) {
971         gen7_render_set_surface_state(ss,
972                                       dest_region->bo, 0,
973                                       dest_region->width, dest_region->height,
974                                       dest_region->pitch, format, 0);
975         if (IS_HASWELL(i965->intel.device_info))
976             gen7_render_set_surface_scs(ss);
977         dri_bo_emit_reloc(ss_bo,
978                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
979                           0,
980                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
981                           dest_region->bo);
982     } else {
983         i965_render_set_surface_state(ss,
984                                       dest_region->bo, 0,
985                                       dest_region->width, dest_region->height,
986                                       dest_region->pitch, format, 0);
987         dri_bo_emit_reloc(ss_bo,
988                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
989                           0,
990                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
991                           dest_region->bo);
992     }
993
994     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
995     dri_bo_unmap(ss_bo);
996 }
997
998 static void
999 i965_fill_vertex_buffer(
1000     VADriverContextP ctx,
1001     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1002     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1003 )
1004 {
1005     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1006     float vb[12];
1007
1008     enum { X1, Y1, X2, Y2 };
1009
1010     static const unsigned int g_rotation_indices[][6] = {
1011         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1012         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1013         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1014         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1015     };
1016
1017     const unsigned int * const rotation_indices =
1018         g_rotation_indices[i965->rotation_attrib->value];
1019
1020     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1021     vb[1]  = tex_coords[rotation_indices[1]];
1022     vb[2]  = vid_coords[X2];
1023     vb[3]  = vid_coords[Y2];
1024
1025     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1026     vb[5]  = tex_coords[rotation_indices[3]];
1027     vb[6]  = vid_coords[X1];
1028     vb[7]  = vid_coords[Y2];
1029
1030     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1031     vb[9]  = tex_coords[rotation_indices[5]];
1032     vb[10] = vid_coords[X1];
1033     vb[11] = vid_coords[Y1];
1034
1035     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1036 }
1037
1038 static void
1039 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1040                                  struct object_surface *obj_surface,
1041                                  const VARectangle *output_rect)
1042 {
1043     unsigned int index = obj_surface->subpic_render_idx;
1044     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1045     float tex_coords[4], vid_coords[4];
1046     VARectangle dst_rect;
1047
1048     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1049         dst_rect = obj_subpic->dst_rect;
1050     else {
1051         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1052         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1053         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1054         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1055         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1056         dst_rect.height = sy * obj_subpic->dst_rect.height;
1057     }
1058
1059     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1060     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1061     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1062     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1063
1064     vid_coords[0] = dst_rect.x;
1065     vid_coords[1] = dst_rect.y;
1066     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1067     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1068
1069     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1070 }
1071
1072 static void
1073 i965_render_upload_vertex(
1074     VADriverContextP   ctx,
1075     struct object_surface *obj_surface,
1076     const VARectangle *src_rect,
1077     const VARectangle *dst_rect
1078 )
1079 {
1080     struct i965_driver_data *i965 = i965_driver_data(ctx);
1081     struct i965_render_state *render_state = &i965->render_state;
1082     struct intel_region *dest_region = render_state->draw_region;
1083     float tex_coords[4], vid_coords[4];
1084     int width, height;
1085
1086     width  = obj_surface->orig_width;
1087     height = obj_surface->orig_height;
1088
1089     tex_coords[0] = (float)src_rect->x / width;
1090     tex_coords[1] = (float)src_rect->y / height;
1091     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1092     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1093
1094     vid_coords[0] = dest_region->x + dst_rect->x;
1095     vid_coords[1] = dest_region->y + dst_rect->y;
1096     vid_coords[2] = vid_coords[0] + dst_rect->width;
1097     vid_coords[3] = vid_coords[1] + dst_rect->height;
1098
1099     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1100 }
1101
1102 #define PI  3.1415926
1103
1104 static void
1105 i965_render_upload_constants(VADriverContextP ctx,
1106                              struct object_surface *obj_surface,
1107                              unsigned int flags)
1108 {
1109     struct i965_driver_data *i965 = i965_driver_data(ctx);
1110     struct i965_render_state *render_state = &i965->render_state;
1111     unsigned short *constant_buffer;
1112     float *color_balance_base;
1113     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1114     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1115     float hue = (float)i965->hue_attrib->value / 180 * PI;
1116     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1117     float *yuv_to_rgb;
1118     const float* yuv_coefs;
1119     size_t coefs_length;
1120
1121     dri_bo_map(render_state->curbe.bo, 1);
1122     assert(render_state->curbe.bo->virtual);
1123     constant_buffer = render_state->curbe.bo->virtual;
1124
1125     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1126         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1127
1128         constant_buffer[0] = 2;
1129     } else {
1130         if (obj_surface->fourcc == VA_FOURCC_NV12)
1131             constant_buffer[0] = 1;
1132         else
1133             constant_buffer[0] = 0;
1134     }
1135
1136     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1137         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1138         i965->hue_attrib->value == DEFAULT_HUE &&
1139         i965->saturation_attrib->value == DEFAULT_SATURATION)
1140         constant_buffer[1] = 1; /* skip color balance transformation */
1141     else
1142         constant_buffer[1] = 0;
1143
1144     color_balance_base = (float *)constant_buffer + 4;
1145     *color_balance_base++ = contrast;
1146     *color_balance_base++ = brightness;
1147     *color_balance_base++ = cos(hue) * contrast * saturation;
1148     *color_balance_base++ = sin(hue) * contrast * saturation;
1149
1150     yuv_to_rgb = (float *)constant_buffer + 8;
1151     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(flags & VA_SRC_COLOR_MASK),
1152                                              &coefs_length);
1153     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
1154
1155     dri_bo_unmap(render_state->curbe.bo);
1156 }
1157
1158 static void
1159 i965_subpic_render_upload_constants(VADriverContextP ctx,
1160                                     struct object_surface *obj_surface)
1161 {
1162     struct i965_driver_data *i965 = i965_driver_data(ctx);
1163     struct i965_render_state *render_state = &i965->render_state;
1164     float *constant_buffer;
1165     float global_alpha = 1.0;
1166     unsigned int index = obj_surface->subpic_render_idx;
1167     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1168
1169     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1170         global_alpha = obj_subpic->global_alpha;
1171     }
1172
1173     dri_bo_map(render_state->curbe.bo, 1);
1174
1175     assert(render_state->curbe.bo->virtual);
1176     constant_buffer = render_state->curbe.bo->virtual;
1177     *constant_buffer = global_alpha;
1178
1179     dri_bo_unmap(render_state->curbe.bo);
1180 }
1181
1182 static void
1183 i965_surface_render_state_setup(
1184     VADriverContextP   ctx,
1185     struct object_surface *obj_surface,
1186     const VARectangle *src_rect,
1187     const VARectangle *dst_rect,
1188     unsigned int       flags
1189 )
1190 {
1191     i965_render_vs_unit(ctx);
1192     i965_render_sf_unit(ctx);
1193     i965_render_dest_surface_state(ctx, 0);
1194     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1195     i965_render_sampler(ctx);
1196     i965_render_wm_unit(ctx);
1197     i965_render_cc_viewport(ctx);
1198     i965_render_cc_unit(ctx);
1199     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1200     i965_render_upload_constants(ctx, obj_surface, flags);
1201 }
1202
1203 static void
1204 i965_subpic_render_state_setup(
1205     VADriverContextP   ctx,
1206     struct object_surface *obj_surface,
1207     const VARectangle *src_rect,
1208     const VARectangle *dst_rect
1209 )
1210 {
1211     i965_render_vs_unit(ctx);
1212     i965_render_sf_unit(ctx);
1213     i965_render_dest_surface_state(ctx, 0);
1214     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1215     i965_render_sampler(ctx);
1216     i965_subpic_render_wm_unit(ctx);
1217     i965_render_cc_viewport(ctx);
1218     i965_subpic_render_cc_unit(ctx);
1219     i965_subpic_render_upload_constants(ctx, obj_surface);
1220     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1221 }
1222
1223
1224 static void
1225 i965_render_pipeline_select(VADriverContextP ctx)
1226 {
1227     struct i965_driver_data *i965 = i965_driver_data(ctx);
1228     struct intel_batchbuffer *batch = i965->batch;
1229
1230     BEGIN_BATCH(batch, 1);
1231     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1232     ADVANCE_BATCH(batch);
1233 }
1234
1235 static void
1236 i965_render_state_sip(VADriverContextP ctx)
1237 {
1238     struct i965_driver_data *i965 = i965_driver_data(ctx);
1239     struct intel_batchbuffer *batch = i965->batch;
1240
1241     BEGIN_BATCH(batch, 2);
1242     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1243     OUT_BATCH(batch, 0);
1244     ADVANCE_BATCH(batch);
1245 }
1246
1247 static void
1248 i965_render_state_base_address(VADriverContextP ctx)
1249 {
1250     struct i965_driver_data *i965 = i965_driver_data(ctx);
1251     struct intel_batchbuffer *batch = i965->batch;
1252     struct i965_render_state *render_state = &i965->render_state;
1253
1254     if (IS_IRONLAKE(i965->intel.device_info)) {
1255         BEGIN_BATCH(batch, 8);
1256         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1257         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1258         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1259         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1260         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1261         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1262         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1263         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1264         ADVANCE_BATCH(batch);
1265     } else {
1266         BEGIN_BATCH(batch, 6);
1267         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1268         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1269         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1270         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1271         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1272         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1273         ADVANCE_BATCH(batch);
1274     }
1275 }
1276
1277 static void
1278 i965_render_binding_table_pointers(VADriverContextP ctx)
1279 {
1280     struct i965_driver_data *i965 = i965_driver_data(ctx);
1281     struct intel_batchbuffer *batch = i965->batch;
1282
1283     BEGIN_BATCH(batch, 6);
1284     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1285     OUT_BATCH(batch, 0); /* vs */
1286     OUT_BATCH(batch, 0); /* gs */
1287     OUT_BATCH(batch, 0); /* clip */
1288     OUT_BATCH(batch, 0); /* sf */
1289     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1290     ADVANCE_BATCH(batch);
1291 }
1292
1293 static void
1294 i965_render_constant_color(VADriverContextP ctx)
1295 {
1296     struct i965_driver_data *i965 = i965_driver_data(ctx);
1297     struct intel_batchbuffer *batch = i965->batch;
1298
1299     BEGIN_BATCH(batch, 5);
1300     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1301     OUT_BATCH(batch, float_to_uint(1.0));
1302     OUT_BATCH(batch, float_to_uint(0.0));
1303     OUT_BATCH(batch, float_to_uint(1.0));
1304     OUT_BATCH(batch, float_to_uint(1.0));
1305     ADVANCE_BATCH(batch);
1306 }
1307
1308 static void
1309 i965_render_pipelined_pointers(VADriverContextP ctx)
1310 {
1311     struct i965_driver_data *i965 = i965_driver_data(ctx);
1312     struct intel_batchbuffer *batch = i965->batch;
1313     struct i965_render_state *render_state = &i965->render_state;
1314
1315     BEGIN_BATCH(batch, 7);
1316     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1317     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1318     OUT_BATCH(batch, 0);  /* disable GS */
1319     OUT_BATCH(batch, 0);  /* disable CLIP */
1320     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1321     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1322     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1323     ADVANCE_BATCH(batch);
1324 }
1325
1326 static void
1327 i965_render_urb_layout(VADriverContextP ctx)
1328 {
1329     struct i965_driver_data *i965 = i965_driver_data(ctx);
1330     struct intel_batchbuffer *batch = i965->batch;
1331     int urb_vs_start, urb_vs_size;
1332     int urb_gs_start, urb_gs_size;
1333     int urb_clip_start, urb_clip_size;
1334     int urb_sf_start, urb_sf_size;
1335     int urb_cs_start, urb_cs_size;
1336
1337     urb_vs_start = 0;
1338     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1339     urb_gs_start = urb_vs_start + urb_vs_size;
1340     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1341     urb_clip_start = urb_gs_start + urb_gs_size;
1342     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1343     urb_sf_start = urb_clip_start + urb_clip_size;
1344     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1345     urb_cs_start = urb_sf_start + urb_sf_size;
1346     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1347
1348     BEGIN_BATCH(batch, 3);
1349     OUT_BATCH(batch,
1350               CMD_URB_FENCE |
1351               UF0_CS_REALLOC |
1352               UF0_SF_REALLOC |
1353               UF0_CLIP_REALLOC |
1354               UF0_GS_REALLOC |
1355               UF0_VS_REALLOC |
1356               1);
1357     OUT_BATCH(batch,
1358               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1359               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1360               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1361     OUT_BATCH(batch,
1362               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1363               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1364     ADVANCE_BATCH(batch);
1365 }
1366
1367 static void
1368 i965_render_cs_urb_layout(VADriverContextP ctx)
1369 {
1370     struct i965_driver_data *i965 = i965_driver_data(ctx);
1371     struct intel_batchbuffer *batch = i965->batch;
1372
1373     BEGIN_BATCH(batch, 2);
1374     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1375     OUT_BATCH(batch,
1376               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1377               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1378     ADVANCE_BATCH(batch);
1379 }
1380
1381 static void
1382 i965_render_constant_buffer(VADriverContextP ctx)
1383 {
1384     struct i965_driver_data *i965 = i965_driver_data(ctx);
1385     struct intel_batchbuffer *batch = i965->batch;
1386     struct i965_render_state *render_state = &i965->render_state;
1387
1388     BEGIN_BATCH(batch, 2);
1389     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1390     OUT_RELOC(batch, render_state->curbe.bo,
1391               I915_GEM_DOMAIN_INSTRUCTION, 0,
1392               URB_CS_ENTRY_SIZE - 1);
1393     ADVANCE_BATCH(batch);
1394 }
1395
1396 static void
1397 i965_render_drawing_rectangle(VADriverContextP ctx)
1398 {
1399     struct i965_driver_data *i965 = i965_driver_data(ctx);
1400     struct intel_batchbuffer *batch = i965->batch;
1401     struct i965_render_state *render_state = &i965->render_state;
1402     struct intel_region *dest_region = render_state->draw_region;
1403
1404     BEGIN_BATCH(batch, 4);
1405     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1406     OUT_BATCH(batch, 0x00000000);
1407     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1408     OUT_BATCH(batch, 0x00000000);
1409     ADVANCE_BATCH(batch);
1410 }
1411
1412 static void
1413 i965_render_vertex_elements(VADriverContextP ctx)
1414 {
1415     struct i965_driver_data *i965 = i965_driver_data(ctx);
1416     struct intel_batchbuffer *batch = i965->batch;
1417
1418     if (IS_IRONLAKE(i965->intel.device_info)) {
1419         BEGIN_BATCH(batch, 5);
1420         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1421         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1422         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1423                   VE0_VALID |
1424                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1425                   (0 << VE0_OFFSET_SHIFT));
1426         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1427                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1428                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1429                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1430         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1431         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1432                   VE0_VALID |
1433                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1434                   (8 << VE0_OFFSET_SHIFT));
1435         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1436                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1437                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1438                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1439         ADVANCE_BATCH(batch);
1440     } else {
1441         BEGIN_BATCH(batch, 5);
1442         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1443         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1444         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1445                   VE0_VALID |
1446                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1447                   (0 << VE0_OFFSET_SHIFT));
1448         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1449                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1450                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1451                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1452                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1453         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1454         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1455                   VE0_VALID |
1456                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1457                   (8 << VE0_OFFSET_SHIFT));
1458         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1459                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1460                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1461                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1462                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1463         ADVANCE_BATCH(batch);
1464     }
1465 }
1466
1467 static void
1468 i965_render_upload_image_palette(
1469     VADriverContextP ctx,
1470     struct object_image *obj_image,
1471     unsigned int     alpha
1472 )
1473 {
1474     struct i965_driver_data *i965 = i965_driver_data(ctx);
1475     struct intel_batchbuffer *batch = i965->batch;
1476     unsigned int i;
1477
1478     assert(obj_image);
1479
1480     if (!obj_image)
1481         return;
1482
1483     if (obj_image->image.num_palette_entries == 0)
1484         return;
1485
1486     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1487     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1488     /*fill palette*/
1489     //int32_t out[16]; //0-23:color 23-31:alpha
1490     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1491         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1492     ADVANCE_BATCH(batch);
1493 }
1494
1495 static void
1496 i965_render_startup(VADriverContextP ctx)
1497 {
1498     struct i965_driver_data *i965 = i965_driver_data(ctx);
1499     struct intel_batchbuffer *batch = i965->batch;
1500     struct i965_render_state *render_state = &i965->render_state;
1501
1502     BEGIN_BATCH(batch, 11);
1503     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1504     OUT_BATCH(batch,
1505               (0 << VB0_BUFFER_INDEX_SHIFT) |
1506               VB0_VERTEXDATA |
1507               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1508     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1509
1510     if (IS_IRONLAKE(i965->intel.device_info))
1511         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1512     else
1513         OUT_BATCH(batch, 3);
1514
1515     OUT_BATCH(batch, 0);
1516
1517     OUT_BATCH(batch,
1518               CMD_3DPRIMITIVE |
1519               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1520               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1521               (0 << 9) |
1522               4);
1523     OUT_BATCH(batch, 3); /* vertex count per instance */
1524     OUT_BATCH(batch, 0); /* start vertex offset */
1525     OUT_BATCH(batch, 1); /* single instance */
1526     OUT_BATCH(batch, 0); /* start instance location */
1527     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1528     ADVANCE_BATCH(batch);
1529 }
1530
1531 static void
1532 i965_clear_dest_region(VADriverContextP ctx)
1533 {
1534     struct i965_driver_data *i965 = i965_driver_data(ctx);
1535     struct intel_batchbuffer *batch = i965->batch;
1536     struct i965_render_state *render_state = &i965->render_state;
1537     struct intel_region *dest_region = render_state->draw_region;
1538     unsigned int blt_cmd, br13;
1539     int pitch;
1540
1541     blt_cmd = XY_COLOR_BLT_CMD;
1542     br13 = 0xf0 << 16;
1543     pitch = dest_region->pitch;
1544
1545     if (dest_region->cpp == 4) {
1546         br13 |= BR13_8888;
1547         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1548     } else {
1549         assert(dest_region->cpp == 2);
1550         br13 |= BR13_565;
1551     }
1552
1553     if (dest_region->tiling != I915_TILING_NONE) {
1554         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1555         pitch /= 4;
1556     }
1557
1558     br13 |= pitch;
1559
1560     if (IS_GEN6(i965->intel.device_info) ||
1561         IS_GEN7(i965->intel.device_info)) {
1562         intel_batchbuffer_start_atomic_blt(batch, 24);
1563         BEGIN_BLT_BATCH(batch, 6);
1564     } else {
1565         intel_batchbuffer_start_atomic(batch, 24);
1566         BEGIN_BATCH(batch, 6);
1567     }
1568
1569     OUT_BATCH(batch, blt_cmd);
1570     OUT_BATCH(batch, br13);
1571     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1572     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1573               (dest_region->x + dest_region->width));
1574     OUT_RELOC(batch, dest_region->bo,
1575               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1576               0);
1577     OUT_BATCH(batch, 0x0);
1578     ADVANCE_BATCH(batch);
1579     intel_batchbuffer_end_atomic(batch);
1580 }
1581
1582 static void
1583 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1584 {
1585     struct i965_driver_data *i965 = i965_driver_data(ctx);
1586     struct intel_batchbuffer *batch = i965->batch;
1587
1588     i965_clear_dest_region(ctx);
1589     intel_batchbuffer_start_atomic(batch, 0x1000);
1590     intel_batchbuffer_emit_mi_flush(batch);
1591     i965_render_pipeline_select(ctx);
1592     i965_render_state_sip(ctx);
1593     i965_render_state_base_address(ctx);
1594     i965_render_binding_table_pointers(ctx);
1595     i965_render_constant_color(ctx);
1596     i965_render_pipelined_pointers(ctx);
1597     i965_render_urb_layout(ctx);
1598     i965_render_cs_urb_layout(ctx);
1599     i965_render_constant_buffer(ctx);
1600     i965_render_drawing_rectangle(ctx);
1601     i965_render_vertex_elements(ctx);
1602     i965_render_startup(ctx);
1603     intel_batchbuffer_end_atomic(batch);
1604 }
1605
1606 static void
1607 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1608 {
1609     struct i965_driver_data *i965 = i965_driver_data(ctx);
1610     struct intel_batchbuffer *batch = i965->batch;
1611
1612     intel_batchbuffer_start_atomic(batch, 0x1000);
1613     intel_batchbuffer_emit_mi_flush(batch);
1614     i965_render_pipeline_select(ctx);
1615     i965_render_state_sip(ctx);
1616     i965_render_state_base_address(ctx);
1617     i965_render_binding_table_pointers(ctx);
1618     i965_render_constant_color(ctx);
1619     i965_render_pipelined_pointers(ctx);
1620     i965_render_urb_layout(ctx);
1621     i965_render_cs_urb_layout(ctx);
1622     i965_render_constant_buffer(ctx);
1623     i965_render_drawing_rectangle(ctx);
1624     i965_render_vertex_elements(ctx);
1625     i965_render_startup(ctx);
1626     intel_batchbuffer_end_atomic(batch);
1627 }
1628
1629
1630 static void
1631 i965_render_initialize(VADriverContextP ctx)
1632 {
1633     struct i965_driver_data *i965 = i965_driver_data(ctx);
1634     struct i965_render_state *render_state = &i965->render_state;
1635     dri_bo *bo;
1636
1637     /* VERTEX BUFFER */
1638     dri_bo_unreference(render_state->vb.vertex_buffer);
1639     bo = dri_bo_alloc(i965->intel.bufmgr,
1640                       "vertex buffer",
1641                       4096,
1642                       4096);
1643     assert(bo);
1644     render_state->vb.vertex_buffer = bo;
1645
1646     /* VS */
1647     dri_bo_unreference(render_state->vs.state);
1648     bo = dri_bo_alloc(i965->intel.bufmgr,
1649                       "vs state",
1650                       sizeof(struct i965_vs_unit_state),
1651                       64);
1652     assert(bo);
1653     render_state->vs.state = bo;
1654
1655     /* GS */
1656     /* CLIP */
1657     /* SF */
1658     dri_bo_unreference(render_state->sf.state);
1659     bo = dri_bo_alloc(i965->intel.bufmgr,
1660                       "sf state",
1661                       sizeof(struct i965_sf_unit_state),
1662                       64);
1663     assert(bo);
1664     render_state->sf.state = bo;
1665
1666     /* WM */
1667     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1668     bo = dri_bo_alloc(i965->intel.bufmgr,
1669                       "surface state & binding table",
1670                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1671                       4096);
1672     assert(bo);
1673     render_state->wm.surface_state_binding_table_bo = bo;
1674
1675     dri_bo_unreference(render_state->wm.sampler);
1676     bo = dri_bo_alloc(i965->intel.bufmgr,
1677                       "sampler state",
1678                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1679                       64);
1680     assert(bo);
1681     render_state->wm.sampler = bo;
1682     render_state->wm.sampler_count = 0;
1683
1684     dri_bo_unreference(render_state->wm.state);
1685     bo = dri_bo_alloc(i965->intel.bufmgr,
1686                       "wm state",
1687                       sizeof(struct i965_wm_unit_state),
1688                       64);
1689     assert(bo);
1690     render_state->wm.state = bo;
1691
1692     /* COLOR CALCULATOR */
1693     dri_bo_unreference(render_state->cc.state);
1694     bo = dri_bo_alloc(i965->intel.bufmgr,
1695                       "color calc state",
1696                       sizeof(struct i965_cc_unit_state),
1697                       64);
1698     assert(bo);
1699     render_state->cc.state = bo;
1700
1701     dri_bo_unreference(render_state->cc.viewport);
1702     bo = dri_bo_alloc(i965->intel.bufmgr,
1703                       "cc viewport",
1704                       sizeof(struct i965_cc_viewport),
1705                       64);
1706     assert(bo);
1707     render_state->cc.viewport = bo;
1708 }
1709
1710 static void
1711 i965_render_put_surface(
1712     VADriverContextP   ctx,
1713     struct object_surface *obj_surface,
1714     const VARectangle *src_rect,
1715     const VARectangle *dst_rect,
1716     unsigned int       flags
1717 )
1718 {
1719     struct i965_driver_data *i965 = i965_driver_data(ctx);
1720     struct intel_batchbuffer *batch = i965->batch;
1721
1722     i965_render_initialize(ctx);
1723     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1724     i965_surface_render_pipeline_setup(ctx);
1725     intel_batchbuffer_flush(batch);
1726 }
1727
1728 static void
1729 i965_render_put_subpicture(
1730     VADriverContextP   ctx,
1731     struct object_surface *obj_surface,
1732     const VARectangle *src_rect,
1733     const VARectangle *dst_rect
1734 )
1735 {
1736     struct i965_driver_data *i965 = i965_driver_data(ctx);
1737     struct intel_batchbuffer *batch = i965->batch;
1738     unsigned int index = obj_surface->subpic_render_idx;
1739     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1740
1741     assert(obj_subpic);
1742
1743     i965_render_initialize(ctx);
1744     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1745     i965_subpic_render_pipeline_setup(ctx);
1746     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1747     intel_batchbuffer_flush(batch);
1748 }
1749
1750 /*
1751  * for GEN6+
1752  */
1753 static void
1754 gen6_render_initialize(VADriverContextP ctx)
1755 {
1756     struct i965_driver_data *i965 = i965_driver_data(ctx);
1757     struct i965_render_state *render_state = &i965->render_state;
1758     dri_bo *bo;
1759
1760     /* VERTEX BUFFER */
1761     dri_bo_unreference(render_state->vb.vertex_buffer);
1762     bo = dri_bo_alloc(i965->intel.bufmgr,
1763                       "vertex buffer",
1764                       4096,
1765                       4096);
1766     assert(bo);
1767     render_state->vb.vertex_buffer = bo;
1768
1769     /* WM */
1770     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1771     bo = dri_bo_alloc(i965->intel.bufmgr,
1772                       "surface state & binding table",
1773                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1774                       4096);
1775     assert(bo);
1776     render_state->wm.surface_state_binding_table_bo = bo;
1777
1778     dri_bo_unreference(render_state->wm.sampler);
1779     bo = dri_bo_alloc(i965->intel.bufmgr,
1780                       "sampler state",
1781                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1782                       4096);
1783     assert(bo);
1784     render_state->wm.sampler = bo;
1785     render_state->wm.sampler_count = 0;
1786
1787     /* COLOR CALCULATOR */
1788     dri_bo_unreference(render_state->cc.state);
1789     bo = dri_bo_alloc(i965->intel.bufmgr,
1790                       "color calc state",
1791                       sizeof(struct gen6_color_calc_state),
1792                       4096);
1793     assert(bo);
1794     render_state->cc.state = bo;
1795
1796     /* CC VIEWPORT */
1797     dri_bo_unreference(render_state->cc.viewport);
1798     bo = dri_bo_alloc(i965->intel.bufmgr,
1799                       "cc viewport",
1800                       sizeof(struct i965_cc_viewport),
1801                       4096);
1802     assert(bo);
1803     render_state->cc.viewport = bo;
1804
1805     /* BLEND STATE */
1806     dri_bo_unreference(render_state->cc.blend);
1807     bo = dri_bo_alloc(i965->intel.bufmgr,
1808                       "blend state",
1809                       sizeof(struct gen6_blend_state),
1810                       4096);
1811     assert(bo);
1812     render_state->cc.blend = bo;
1813
1814     /* DEPTH & STENCIL STATE */
1815     dri_bo_unreference(render_state->cc.depth_stencil);
1816     bo = dri_bo_alloc(i965->intel.bufmgr,
1817                       "depth & stencil state",
1818                       sizeof(struct gen6_depth_stencil_state),
1819                       4096);
1820     assert(bo);
1821     render_state->cc.depth_stencil = bo;
1822 }
1823
1824 static void
1825 gen6_render_color_calc_state(VADriverContextP ctx)
1826 {
1827     struct i965_driver_data *i965 = i965_driver_data(ctx);
1828     struct i965_render_state *render_state = &i965->render_state;
1829     struct gen6_color_calc_state *color_calc_state;
1830
1831     dri_bo_map(render_state->cc.state, 1);
1832     assert(render_state->cc.state->virtual);
1833     color_calc_state = render_state->cc.state->virtual;
1834     memset(color_calc_state, 0, sizeof(*color_calc_state));
1835     color_calc_state->constant_r = 1.0;
1836     color_calc_state->constant_g = 0.0;
1837     color_calc_state->constant_b = 1.0;
1838     color_calc_state->constant_a = 1.0;
1839     dri_bo_unmap(render_state->cc.state);
1840 }
1841
1842 static void
1843 gen6_render_blend_state(VADriverContextP ctx)
1844 {
1845     struct i965_driver_data *i965 = i965_driver_data(ctx);
1846     struct i965_render_state *render_state = &i965->render_state;
1847     struct gen6_blend_state *blend_state;
1848
1849     dri_bo_map(render_state->cc.blend, 1);
1850     assert(render_state->cc.blend->virtual);
1851     blend_state = render_state->cc.blend->virtual;
1852     memset(blend_state, 0, sizeof(*blend_state));
1853     blend_state->blend1.logic_op_enable = 1;
1854     blend_state->blend1.logic_op_func = 0xc;
1855     dri_bo_unmap(render_state->cc.blend);
1856 }
1857
1858 static void
1859 gen6_render_depth_stencil_state(VADriverContextP ctx)
1860 {
1861     struct i965_driver_data *i965 = i965_driver_data(ctx);
1862     struct i965_render_state *render_state = &i965->render_state;
1863     struct gen6_depth_stencil_state *depth_stencil_state;
1864
1865     dri_bo_map(render_state->cc.depth_stencil, 1);
1866     assert(render_state->cc.depth_stencil->virtual);
1867     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1868     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1869     dri_bo_unmap(render_state->cc.depth_stencil);
1870 }
1871
1872 static void
1873 gen6_render_setup_states(
1874     VADriverContextP   ctx,
1875     struct object_surface *obj_surface,
1876     const VARectangle *src_rect,
1877     const VARectangle *dst_rect,
1878     unsigned int       flags
1879 )
1880 {
1881     i965_render_dest_surface_state(ctx, 0);
1882     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1883     i965_render_sampler(ctx);
1884     i965_render_cc_viewport(ctx);
1885     gen6_render_color_calc_state(ctx);
1886     gen6_render_blend_state(ctx);
1887     gen6_render_depth_stencil_state(ctx);
1888     i965_render_upload_constants(ctx, obj_surface, flags);
1889     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1890 }
1891
1892 static void
1893 gen6_emit_invarient_states(VADriverContextP ctx)
1894 {
1895     struct i965_driver_data *i965 = i965_driver_data(ctx);
1896     struct intel_batchbuffer *batch = i965->batch;
1897
1898     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1899
1900     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1901     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1902               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1903     OUT_BATCH(batch, 0);
1904
1905     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1906     OUT_BATCH(batch, 1);
1907
1908     /* Set system instruction pointer */
1909     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1910     OUT_BATCH(batch, 0);
1911 }
1912
1913 static void
1914 gen6_emit_state_base_address(VADriverContextP ctx)
1915 {
1916     struct i965_driver_data *i965 = i965_driver_data(ctx);
1917     struct intel_batchbuffer *batch = i965->batch;
1918     struct i965_render_state *render_state = &i965->render_state;
1919
1920     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1921     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1922     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1923     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1924     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1925     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1926     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1927     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1928     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1929     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1930 }
1931
1932 static void
1933 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1934 {
1935     struct i965_driver_data *i965 = i965_driver_data(ctx);
1936     struct intel_batchbuffer *batch = i965->batch;
1937     struct i965_render_state *render_state = &i965->render_state;
1938
1939     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1940               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1941               (4 - 2));
1942     OUT_BATCH(batch, 0);
1943     OUT_BATCH(batch, 0);
1944     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1945 }
1946
1947 static void
1948 gen6_emit_urb(VADriverContextP ctx)
1949 {
1950     struct i965_driver_data *i965 = i965_driver_data(ctx);
1951     struct intel_batchbuffer *batch = i965->batch;
1952
1953     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1954     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1955               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1956     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1957               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1958 }
1959
1960 static void
1961 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1962 {
1963     struct i965_driver_data *i965 = i965_driver_data(ctx);
1964     struct intel_batchbuffer *batch = i965->batch;
1965     struct i965_render_state *render_state = &i965->render_state;
1966
1967     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1968     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1969     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1970     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1971 }
1972
1973 static void
1974 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1975 {
1976     struct i965_driver_data *i965 = i965_driver_data(ctx);
1977     struct intel_batchbuffer *batch = i965->batch;
1978     struct i965_render_state *render_state = &i965->render_state;
1979
1980     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1981               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1982               (4 - 2));
1983     OUT_BATCH(batch, 0); /* VS */
1984     OUT_BATCH(batch, 0); /* GS */
1985     OUT_RELOC(batch, render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1986 }
1987
1988 static void
1989 gen6_emit_binding_table(VADriverContextP ctx)
1990 {
1991     struct i965_driver_data *i965 = i965_driver_data(ctx);
1992     struct intel_batchbuffer *batch = i965->batch;
1993
1994     /* Binding table pointers */
1995     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1996               GEN6_BINDING_TABLE_MODIFY_PS |
1997               (4 - 2));
1998     OUT_BATCH(batch, 0);        /* vs */
1999     OUT_BATCH(batch, 0);        /* gs */
2000     /* Only the PS uses the binding table */
2001     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2002 }
2003
2004 static void
2005 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2006 {
2007     struct i965_driver_data *i965 = i965_driver_data(ctx);
2008     struct intel_batchbuffer *batch = i965->batch;
2009
2010     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2011     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2012               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2013     OUT_BATCH(batch, 0);
2014     OUT_BATCH(batch, 0);
2015     OUT_BATCH(batch, 0);
2016     OUT_BATCH(batch, 0);
2017     OUT_BATCH(batch, 0);
2018
2019     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2020     OUT_BATCH(batch, 0);
2021 }
2022
2023 static void
2024 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2025 {
2026     i965_render_drawing_rectangle(ctx);
2027 }
2028
2029 static void
2030 gen6_emit_vs_state(VADriverContextP ctx)
2031 {
2032     struct i965_driver_data *i965 = i965_driver_data(ctx);
2033     struct intel_batchbuffer *batch = i965->batch;
2034
2035     /* disable VS constant buffer */
2036     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2037     OUT_BATCH(batch, 0);
2038     OUT_BATCH(batch, 0);
2039     OUT_BATCH(batch, 0);
2040     OUT_BATCH(batch, 0);
2041
2042     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2043     OUT_BATCH(batch, 0); /* without VS kernel */
2044     OUT_BATCH(batch, 0);
2045     OUT_BATCH(batch, 0);
2046     OUT_BATCH(batch, 0);
2047     OUT_BATCH(batch, 0); /* pass-through */
2048 }
2049
2050 static void
2051 gen6_emit_gs_state(VADriverContextP ctx)
2052 {
2053     struct i965_driver_data *i965 = i965_driver_data(ctx);
2054     struct intel_batchbuffer *batch = i965->batch;
2055
2056     /* disable GS constant buffer */
2057     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2058     OUT_BATCH(batch, 0);
2059     OUT_BATCH(batch, 0);
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062
2063     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2064     OUT_BATCH(batch, 0); /* without GS kernel */
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch, 0);
2067     OUT_BATCH(batch, 0);
2068     OUT_BATCH(batch, 0);
2069     OUT_BATCH(batch, 0); /* pass-through */
2070 }
2071
2072 static void
2073 gen6_emit_clip_state(VADriverContextP ctx)
2074 {
2075     struct i965_driver_data *i965 = i965_driver_data(ctx);
2076     struct intel_batchbuffer *batch = i965->batch;
2077
2078     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2079     OUT_BATCH(batch, 0);
2080     OUT_BATCH(batch, 0); /* pass-through */
2081     OUT_BATCH(batch, 0);
2082 }
2083
2084 static void
2085 gen6_emit_sf_state(VADriverContextP ctx)
2086 {
2087     struct i965_driver_data *i965 = i965_driver_data(ctx);
2088     struct intel_batchbuffer *batch = i965->batch;
2089
2090     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2091     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2092               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2093               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2094     OUT_BATCH(batch, 0);
2095     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2096     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2097     OUT_BATCH(batch, 0);
2098     OUT_BATCH(batch, 0);
2099     OUT_BATCH(batch, 0);
2100     OUT_BATCH(batch, 0);
2101     OUT_BATCH(batch, 0); /* DW9 */
2102     OUT_BATCH(batch, 0);
2103     OUT_BATCH(batch, 0);
2104     OUT_BATCH(batch, 0);
2105     OUT_BATCH(batch, 0);
2106     OUT_BATCH(batch, 0); /* DW14 */
2107     OUT_BATCH(batch, 0);
2108     OUT_BATCH(batch, 0);
2109     OUT_BATCH(batch, 0);
2110     OUT_BATCH(batch, 0);
2111     OUT_BATCH(batch, 0); /* DW19 */
2112 }
2113
2114 static void
2115 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2116 {
2117     struct i965_driver_data *i965 = i965_driver_data(ctx);
2118     struct intel_batchbuffer *batch = i965->batch;
2119     struct i965_render_state *render_state = &i965->render_state;
2120
2121     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2122               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2123               (5 - 2));
2124     OUT_RELOC(batch,
2125               render_state->curbe.bo,
2126               I915_GEM_DOMAIN_INSTRUCTION, 0,
2127               (URB_CS_ENTRY_SIZE - 1));
2128     OUT_BATCH(batch, 0);
2129     OUT_BATCH(batch, 0);
2130     OUT_BATCH(batch, 0);
2131
2132     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2133     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2134               I915_GEM_DOMAIN_INSTRUCTION, 0,
2135               0);
2136     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2137               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2138     OUT_BATCH(batch, 0);
2139     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2140     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2141               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2142               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2143     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2144               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2145     OUT_BATCH(batch, 0);
2146     OUT_BATCH(batch, 0);
2147 }
2148
2149 static void
2150 gen6_emit_vertex_element_state(VADriverContextP ctx)
2151 {
2152     struct i965_driver_data *i965 = i965_driver_data(ctx);
2153     struct intel_batchbuffer *batch = i965->batch;
2154
2155     /* Set up our vertex elements, sourced from the single vertex buffer. */
2156     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2157     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2158     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2159               GEN6_VE0_VALID |
2160               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2161               (0 << VE0_OFFSET_SHIFT));
2162     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2163               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2164               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2165               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2166     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2167     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2168               GEN6_VE0_VALID |
2169               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2170               (8 << VE0_OFFSET_SHIFT));
2171     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2172               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2173               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2174               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2175 }
2176
2177 static void
2178 gen6_emit_vertices(VADriverContextP ctx)
2179 {
2180     struct i965_driver_data *i965 = i965_driver_data(ctx);
2181     struct intel_batchbuffer *batch = i965->batch;
2182     struct i965_render_state *render_state = &i965->render_state;
2183
2184     BEGIN_BATCH(batch, 11);
2185     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2186     OUT_BATCH(batch,
2187               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2188               GEN6_VB0_VERTEXDATA |
2189               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2190     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2191     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2192     OUT_BATCH(batch, 0);
2193
2194     OUT_BATCH(batch,
2195               CMD_3DPRIMITIVE |
2196               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2197               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2198               (0 << 9) |
2199               4);
2200     OUT_BATCH(batch, 3); /* vertex count per instance */
2201     OUT_BATCH(batch, 0); /* start vertex offset */
2202     OUT_BATCH(batch, 1); /* single instance */
2203     OUT_BATCH(batch, 0); /* start instance location */
2204     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2205     ADVANCE_BATCH(batch);
2206 }
2207
2208 static void
2209 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2210 {
2211     struct i965_driver_data *i965 = i965_driver_data(ctx);
2212     struct intel_batchbuffer *batch = i965->batch;
2213
2214     intel_batchbuffer_start_atomic(batch, 0x1000);
2215     intel_batchbuffer_emit_mi_flush(batch);
2216     gen6_emit_invarient_states(ctx);
2217     gen6_emit_state_base_address(ctx);
2218     gen6_emit_viewport_state_pointers(ctx);
2219     gen6_emit_urb(ctx);
2220     gen6_emit_cc_state_pointers(ctx);
2221     gen6_emit_sampler_state_pointers(ctx);
2222     gen6_emit_vs_state(ctx);
2223     gen6_emit_gs_state(ctx);
2224     gen6_emit_clip_state(ctx);
2225     gen6_emit_sf_state(ctx);
2226     gen6_emit_wm_state(ctx, kernel);
2227     gen6_emit_binding_table(ctx);
2228     gen6_emit_depth_buffer_state(ctx);
2229     gen6_emit_drawing_rectangle(ctx);
2230     gen6_emit_vertex_element_state(ctx);
2231     gen6_emit_vertices(ctx);
2232     intel_batchbuffer_end_atomic(batch);
2233 }
2234
2235 static void
2236 gen6_render_put_surface(
2237     VADriverContextP   ctx,
2238     struct object_surface *obj_surface,
2239     const VARectangle *src_rect,
2240     const VARectangle *dst_rect,
2241     unsigned int       flags
2242 )
2243 {
2244     struct i965_driver_data *i965 = i965_driver_data(ctx);
2245     struct intel_batchbuffer *batch = i965->batch;
2246
2247     gen6_render_initialize(ctx);
2248     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2249     i965_clear_dest_region(ctx);
2250     gen6_render_emit_states(ctx, PS_KERNEL);
2251     intel_batchbuffer_flush(batch);
2252 }
2253
2254 static void
2255 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2256 {
2257     struct i965_driver_data *i965 = i965_driver_data(ctx);
2258     struct i965_render_state *render_state = &i965->render_state;
2259     struct gen6_blend_state *blend_state;
2260
2261     dri_bo_unmap(render_state->cc.state);
2262     dri_bo_map(render_state->cc.blend, 1);
2263     assert(render_state->cc.blend->virtual);
2264     blend_state = render_state->cc.blend->virtual;
2265     memset(blend_state, 0, sizeof(*blend_state));
2266     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2267     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2268     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2269     blend_state->blend0.blend_enable = 1;
2270     blend_state->blend1.post_blend_clamp_enable = 1;
2271     blend_state->blend1.pre_blend_clamp_enable = 1;
2272     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2273     dri_bo_unmap(render_state->cc.blend);
2274 }
2275
2276 static void
2277 gen6_subpicture_render_setup_states(
2278     VADriverContextP   ctx,
2279     struct object_surface *obj_surface,
2280     const VARectangle *src_rect,
2281     const VARectangle *dst_rect
2282 )
2283 {
2284     i965_render_dest_surface_state(ctx, 0);
2285     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2286     i965_render_sampler(ctx);
2287     i965_render_cc_viewport(ctx);
2288     gen6_render_color_calc_state(ctx);
2289     gen6_subpicture_render_blend_state(ctx);
2290     gen6_render_depth_stencil_state(ctx);
2291     i965_subpic_render_upload_constants(ctx, obj_surface);
2292     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2293 }
2294
2295 static void
2296 gen6_render_put_subpicture(
2297     VADriverContextP   ctx,
2298     struct object_surface *obj_surface,
2299     const VARectangle *src_rect,
2300     const VARectangle *dst_rect
2301 )
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct intel_batchbuffer *batch = i965->batch;
2305     unsigned int index = obj_surface->subpic_render_idx;
2306     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2307
2308     assert(obj_subpic);
2309     gen6_render_initialize(ctx);
2310     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2311     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2312     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2313     intel_batchbuffer_flush(batch);
2314 }
2315
2316 /*
2317  * for GEN7
2318  */
2319 static void
2320 gen7_render_initialize(VADriverContextP ctx)
2321 {
2322     struct i965_driver_data *i965 = i965_driver_data(ctx);
2323     struct i965_render_state *render_state = &i965->render_state;
2324     dri_bo *bo;
2325
2326     /* VERTEX BUFFER */
2327     dri_bo_unreference(render_state->vb.vertex_buffer);
2328     bo = dri_bo_alloc(i965->intel.bufmgr,
2329                       "vertex buffer",
2330                       4096,
2331                       4096);
2332     assert(bo);
2333     render_state->vb.vertex_buffer = bo;
2334
2335     /* WM */
2336     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2337     bo = dri_bo_alloc(i965->intel.bufmgr,
2338                       "surface state & binding table",
2339                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2340                       4096);
2341     assert(bo);
2342     render_state->wm.surface_state_binding_table_bo = bo;
2343
2344     dri_bo_unreference(render_state->wm.sampler);
2345     bo = dri_bo_alloc(i965->intel.bufmgr,
2346                       "sampler state",
2347                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2348                       4096);
2349     assert(bo);
2350     render_state->wm.sampler = bo;
2351     render_state->wm.sampler_count = 0;
2352
2353     /* COLOR CALCULATOR */
2354     dri_bo_unreference(render_state->cc.state);
2355     bo = dri_bo_alloc(i965->intel.bufmgr,
2356                       "color calc state",
2357                       sizeof(struct gen6_color_calc_state),
2358                       4096);
2359     assert(bo);
2360     render_state->cc.state = bo;
2361
2362     /* CC VIEWPORT */
2363     dri_bo_unreference(render_state->cc.viewport);
2364     bo = dri_bo_alloc(i965->intel.bufmgr,
2365                       "cc viewport",
2366                       sizeof(struct i965_cc_viewport),
2367                       4096);
2368     assert(bo);
2369     render_state->cc.viewport = bo;
2370
2371     /* BLEND STATE */
2372     dri_bo_unreference(render_state->cc.blend);
2373     bo = dri_bo_alloc(i965->intel.bufmgr,
2374                       "blend state",
2375                       sizeof(struct gen6_blend_state),
2376                       4096);
2377     assert(bo);
2378     render_state->cc.blend = bo;
2379
2380     /* DEPTH & STENCIL STATE */
2381     dri_bo_unreference(render_state->cc.depth_stencil);
2382     bo = dri_bo_alloc(i965->intel.bufmgr,
2383                       "depth & stencil state",
2384                       sizeof(struct gen6_depth_stencil_state),
2385                       4096);
2386     assert(bo);
2387     render_state->cc.depth_stencil = bo;
2388 }
2389
2390 /*
2391  * for GEN8
2392  */
2393 #define ALIGNMENT       64
2394
2395 static void
2396 gen7_render_color_calc_state(VADriverContextP ctx)
2397 {
2398     struct i965_driver_data *i965 = i965_driver_data(ctx);
2399     struct i965_render_state *render_state = &i965->render_state;
2400     struct gen6_color_calc_state *color_calc_state;
2401
2402     dri_bo_map(render_state->cc.state, 1);
2403     assert(render_state->cc.state->virtual);
2404     color_calc_state = render_state->cc.state->virtual;
2405     memset(color_calc_state, 0, sizeof(*color_calc_state));
2406     color_calc_state->constant_r = 1.0;
2407     color_calc_state->constant_g = 0.0;
2408     color_calc_state->constant_b = 1.0;
2409     color_calc_state->constant_a = 1.0;
2410     dri_bo_unmap(render_state->cc.state);
2411 }
2412
2413 static void
2414 gen7_render_blend_state(VADriverContextP ctx)
2415 {
2416     struct i965_driver_data *i965 = i965_driver_data(ctx);
2417     struct i965_render_state *render_state = &i965->render_state;
2418     struct gen6_blend_state *blend_state;
2419
2420     dri_bo_map(render_state->cc.blend, 1);
2421     assert(render_state->cc.blend->virtual);
2422     blend_state = render_state->cc.blend->virtual;
2423     memset(blend_state, 0, sizeof(*blend_state));
2424     blend_state->blend1.logic_op_enable = 1;
2425     blend_state->blend1.logic_op_func = 0xc;
2426     blend_state->blend1.pre_blend_clamp_enable = 1;
2427     dri_bo_unmap(render_state->cc.blend);
2428 }
2429
2430 static void
2431 gen7_render_depth_stencil_state(VADriverContextP ctx)
2432 {
2433     struct i965_driver_data *i965 = i965_driver_data(ctx);
2434     struct i965_render_state *render_state = &i965->render_state;
2435     struct gen6_depth_stencil_state *depth_stencil_state;
2436
2437     dri_bo_map(render_state->cc.depth_stencil, 1);
2438     assert(render_state->cc.depth_stencil->virtual);
2439     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2440     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2441     dri_bo_unmap(render_state->cc.depth_stencil);
2442 }
2443
2444 static void
2445 gen7_render_sampler(VADriverContextP ctx)
2446 {
2447     struct i965_driver_data *i965 = i965_driver_data(ctx);
2448     struct i965_render_state *render_state = &i965->render_state;
2449     struct gen7_sampler_state *sampler_state;
2450     int i;
2451
2452     assert(render_state->wm.sampler_count > 0);
2453     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2454
2455     dri_bo_map(render_state->wm.sampler, 1);
2456     assert(render_state->wm.sampler->virtual);
2457     sampler_state = render_state->wm.sampler->virtual;
2458     for (i = 0; i < render_state->wm.sampler_count; i++) {
2459         memset(sampler_state, 0, sizeof(*sampler_state));
2460         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2461         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2462         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2463         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2464         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2465         sampler_state++;
2466     }
2467
2468     dri_bo_unmap(render_state->wm.sampler);
2469 }
2470
2471
2472 static void
2473 gen7_render_setup_states(
2474     VADriverContextP   ctx,
2475     struct object_surface *obj_surface,
2476     const VARectangle *src_rect,
2477     const VARectangle *dst_rect,
2478     unsigned int       flags
2479 )
2480 {
2481     i965_render_dest_surface_state(ctx, 0);
2482     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2483     gen7_render_sampler(ctx);
2484     i965_render_cc_viewport(ctx);
2485     gen7_render_color_calc_state(ctx);
2486     gen7_render_blend_state(ctx);
2487     gen7_render_depth_stencil_state(ctx);
2488     i965_render_upload_constants(ctx, obj_surface, flags);
2489     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2490 }
2491
2492
2493 static void
2494 gen7_emit_invarient_states(VADriverContextP ctx)
2495 {
2496     struct i965_driver_data *i965 = i965_driver_data(ctx);
2497     struct intel_batchbuffer *batch = i965->batch;
2498
2499     BEGIN_BATCH(batch, 1);
2500     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2501     ADVANCE_BATCH(batch);
2502
2503     BEGIN_BATCH(batch, 4);
2504     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2505     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2506               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2507     OUT_BATCH(batch, 0);
2508     OUT_BATCH(batch, 0);
2509     ADVANCE_BATCH(batch);
2510
2511     BEGIN_BATCH(batch, 2);
2512     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2513     OUT_BATCH(batch, 1);
2514     ADVANCE_BATCH(batch);
2515
2516     /* Set system instruction pointer */
2517     BEGIN_BATCH(batch, 2);
2518     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2519     OUT_BATCH(batch, 0);
2520     ADVANCE_BATCH(batch);
2521 }
2522
2523 static void
2524 gen7_emit_state_base_address(VADriverContextP ctx)
2525 {
2526     struct i965_driver_data *i965 = i965_driver_data(ctx);
2527     struct intel_batchbuffer *batch = i965->batch;
2528     struct i965_render_state *render_state = &i965->render_state;
2529
2530     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2531     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2532     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2533     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2534     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2535     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2536     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2537     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2538     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2539     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2540 }
2541
2542 static void
2543 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2544 {
2545     struct i965_driver_data *i965 = i965_driver_data(ctx);
2546     struct intel_batchbuffer *batch = i965->batch;
2547     struct i965_render_state *render_state = &i965->render_state;
2548
2549     BEGIN_BATCH(batch, 2);
2550     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2551     OUT_RELOC(batch,
2552               render_state->cc.viewport,
2553               I915_GEM_DOMAIN_INSTRUCTION, 0,
2554               0);
2555     ADVANCE_BATCH(batch);
2556
2557     BEGIN_BATCH(batch, 2);
2558     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2559     OUT_BATCH(batch, 0);
2560     ADVANCE_BATCH(batch);
2561 }
2562
2563 /*
2564  * URB layout on GEN7
2565  * ----------------------------------------
2566  * | PS Push Constants (8KB) | VS entries |
2567  * ----------------------------------------
2568  */
2569 static void
2570 gen7_emit_urb(VADriverContextP ctx)
2571 {
2572     struct i965_driver_data *i965 = i965_driver_data(ctx);
2573     struct intel_batchbuffer *batch = i965->batch;
2574     unsigned int num_urb_entries = 32;
2575
2576     if (IS_HASWELL(i965->intel.device_info))
2577         num_urb_entries = 64;
2578
2579     BEGIN_BATCH(batch, 2);
2580     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2581     OUT_BATCH(batch, 8); /* in 1KBs */
2582     ADVANCE_BATCH(batch);
2583
2584     BEGIN_BATCH(batch, 2);
2585     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2586     OUT_BATCH(batch,
2587               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2588               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2589               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2590     ADVANCE_BATCH(batch);
2591
2592     BEGIN_BATCH(batch, 2);
2593     OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2594     OUT_BATCH(batch,
2595               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2596               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2597     ADVANCE_BATCH(batch);
2598
2599     BEGIN_BATCH(batch, 2);
2600     OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2601     OUT_BATCH(batch,
2602               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2603               (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2604     ADVANCE_BATCH(batch);
2605
2606     BEGIN_BATCH(batch, 2);
2607     OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2608     OUT_BATCH(batch,
2609               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2610               (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2611     ADVANCE_BATCH(batch);
2612 }
2613
2614 static void
2615 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2616 {
2617     struct i965_driver_data *i965 = i965_driver_data(ctx);
2618     struct intel_batchbuffer *batch = i965->batch;
2619     struct i965_render_state *render_state = &i965->render_state;
2620
2621     BEGIN_BATCH(batch, 2);
2622     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2623     OUT_RELOC(batch,
2624               render_state->cc.state,
2625               I915_GEM_DOMAIN_INSTRUCTION, 0,
2626               1);
2627     ADVANCE_BATCH(batch);
2628
2629     BEGIN_BATCH(batch, 2);
2630     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2631     OUT_RELOC(batch,
2632               render_state->cc.blend,
2633               I915_GEM_DOMAIN_INSTRUCTION, 0,
2634               1);
2635     ADVANCE_BATCH(batch);
2636
2637     BEGIN_BATCH(batch, 2);
2638     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2639     OUT_RELOC(batch,
2640               render_state->cc.depth_stencil,
2641               I915_GEM_DOMAIN_INSTRUCTION, 0,
2642               1);
2643     ADVANCE_BATCH(batch);
2644 }
2645
2646 static void
2647 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2648 {
2649     struct i965_driver_data *i965 = i965_driver_data(ctx);
2650     struct intel_batchbuffer *batch = i965->batch;
2651     struct i965_render_state *render_state = &i965->render_state;
2652
2653     BEGIN_BATCH(batch, 2);
2654     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2655     OUT_RELOC(batch,
2656               render_state->wm.sampler,
2657               I915_GEM_DOMAIN_INSTRUCTION, 0,
2658               0);
2659     ADVANCE_BATCH(batch);
2660 }
2661
2662 static void
2663 gen7_emit_binding_table(VADriverContextP ctx)
2664 {
2665     struct i965_driver_data *i965 = i965_driver_data(ctx);
2666     struct intel_batchbuffer *batch = i965->batch;
2667
2668     BEGIN_BATCH(batch, 2);
2669     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2670     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2671     ADVANCE_BATCH(batch);
2672 }
2673
2674 static void
2675 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2676 {
2677     struct i965_driver_data *i965 = i965_driver_data(ctx);
2678     struct intel_batchbuffer *batch = i965->batch;
2679
2680     BEGIN_BATCH(batch, 7);
2681     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2682     OUT_BATCH(batch,
2683               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2684               (I965_SURFACE_NULL << 29));
2685     OUT_BATCH(batch, 0);
2686     OUT_BATCH(batch, 0);
2687     OUT_BATCH(batch, 0);
2688     OUT_BATCH(batch, 0);
2689     OUT_BATCH(batch, 0);
2690     ADVANCE_BATCH(batch);
2691
2692     BEGIN_BATCH(batch, 3);
2693     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2694     OUT_BATCH(batch, 0);
2695     OUT_BATCH(batch, 0);
2696     ADVANCE_BATCH(batch);
2697 }
2698
2699 static void
2700 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2701 {
2702     i965_render_drawing_rectangle(ctx);
2703 }
2704
2705 static void
2706 gen7_emit_vs_state(VADriverContextP ctx)
2707 {
2708     struct i965_driver_data *i965 = i965_driver_data(ctx);
2709     struct intel_batchbuffer *batch = i965->batch;
2710
2711     /* disable VS constant buffer */
2712     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     OUT_BATCH(batch, 0);
2716     OUT_BATCH(batch, 0);
2717     OUT_BATCH(batch, 0);
2718     OUT_BATCH(batch, 0);
2719
2720     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2721     OUT_BATCH(batch, 0); /* without VS kernel */
2722     OUT_BATCH(batch, 0);
2723     OUT_BATCH(batch, 0);
2724     OUT_BATCH(batch, 0);
2725     OUT_BATCH(batch, 0); /* pass-through */
2726 }
2727
2728 static void
2729 gen7_emit_bypass_state(VADriverContextP ctx)
2730 {
2731     struct i965_driver_data *i965 = i965_driver_data(ctx);
2732     struct intel_batchbuffer *batch = i965->batch;
2733
2734     /* bypass GS */
2735     BEGIN_BATCH(batch, 7);
2736     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2737     OUT_BATCH(batch, 0);
2738     OUT_BATCH(batch, 0);
2739     OUT_BATCH(batch, 0);
2740     OUT_BATCH(batch, 0);
2741     OUT_BATCH(batch, 0);
2742     OUT_BATCH(batch, 0);
2743     ADVANCE_BATCH(batch);
2744
2745     BEGIN_BATCH(batch, 7);
2746     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2747     OUT_BATCH(batch, 0); /* without GS kernel */
2748     OUT_BATCH(batch, 0);
2749     OUT_BATCH(batch, 0);
2750     OUT_BATCH(batch, 0);
2751     OUT_BATCH(batch, 0);
2752     OUT_BATCH(batch, 0); /* pass-through */
2753     ADVANCE_BATCH(batch);
2754
2755     BEGIN_BATCH(batch, 2);
2756     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2757     OUT_BATCH(batch, 0);
2758     ADVANCE_BATCH(batch);
2759
2760     /* disable HS */
2761     BEGIN_BATCH(batch, 7);
2762     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2763     OUT_BATCH(batch, 0);
2764     OUT_BATCH(batch, 0);
2765     OUT_BATCH(batch, 0);
2766     OUT_BATCH(batch, 0);
2767     OUT_BATCH(batch, 0);
2768     OUT_BATCH(batch, 0);
2769     ADVANCE_BATCH(batch);
2770
2771     BEGIN_BATCH(batch, 7);
2772     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2773     OUT_BATCH(batch, 0);
2774     OUT_BATCH(batch, 0);
2775     OUT_BATCH(batch, 0);
2776     OUT_BATCH(batch, 0);
2777     OUT_BATCH(batch, 0);
2778     OUT_BATCH(batch, 0);
2779     ADVANCE_BATCH(batch);
2780
2781     BEGIN_BATCH(batch, 2);
2782     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2783     OUT_BATCH(batch, 0);
2784     ADVANCE_BATCH(batch);
2785
2786     /* Disable TE */
2787     BEGIN_BATCH(batch, 4);
2788     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2789     OUT_BATCH(batch, 0);
2790     OUT_BATCH(batch, 0);
2791     OUT_BATCH(batch, 0);
2792     ADVANCE_BATCH(batch);
2793
2794     /* Disable DS */
2795     BEGIN_BATCH(batch, 7);
2796     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2797     OUT_BATCH(batch, 0);
2798     OUT_BATCH(batch, 0);
2799     OUT_BATCH(batch, 0);
2800     OUT_BATCH(batch, 0);
2801     OUT_BATCH(batch, 0);
2802     OUT_BATCH(batch, 0);
2803     ADVANCE_BATCH(batch);
2804
2805     BEGIN_BATCH(batch, 6);
2806     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2807     OUT_BATCH(batch, 0);
2808     OUT_BATCH(batch, 0);
2809     OUT_BATCH(batch, 0);
2810     OUT_BATCH(batch, 0);
2811     OUT_BATCH(batch, 0);
2812     ADVANCE_BATCH(batch);
2813
2814     BEGIN_BATCH(batch, 2);
2815     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2816     OUT_BATCH(batch, 0);
2817     ADVANCE_BATCH(batch);
2818
2819     /* Disable STREAMOUT */
2820     BEGIN_BATCH(batch, 3);
2821     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2822     OUT_BATCH(batch, 0);
2823     OUT_BATCH(batch, 0);
2824     ADVANCE_BATCH(batch);
2825 }
2826
2827 static void
2828 gen7_emit_clip_state(VADriverContextP ctx)
2829 {
2830     struct i965_driver_data *i965 = i965_driver_data(ctx);
2831     struct intel_batchbuffer *batch = i965->batch;
2832
2833     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2834     OUT_BATCH(batch, 0);
2835     OUT_BATCH(batch, 0); /* pass-through */
2836     OUT_BATCH(batch, 0);
2837 }
2838
2839 static void
2840 gen7_emit_sf_state(VADriverContextP ctx)
2841 {
2842     struct i965_driver_data *i965 = i965_driver_data(ctx);
2843     struct intel_batchbuffer *batch = i965->batch;
2844
2845     BEGIN_BATCH(batch, 14);
2846     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2847     OUT_BATCH(batch,
2848               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2849               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2850               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2851     OUT_BATCH(batch, 0);
2852     OUT_BATCH(batch, 0);
2853     OUT_BATCH(batch, 0); /* DW4 */
2854     OUT_BATCH(batch, 0);
2855     OUT_BATCH(batch, 0);
2856     OUT_BATCH(batch, 0);
2857     OUT_BATCH(batch, 0);
2858     OUT_BATCH(batch, 0); /* DW9 */
2859     OUT_BATCH(batch, 0);
2860     OUT_BATCH(batch, 0);
2861     OUT_BATCH(batch, 0);
2862     OUT_BATCH(batch, 0);
2863     ADVANCE_BATCH(batch);
2864
2865     BEGIN_BATCH(batch, 7);
2866     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2867     OUT_BATCH(batch, 0);
2868     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2869     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2870     OUT_BATCH(batch, 0);
2871     OUT_BATCH(batch, 0);
2872     OUT_BATCH(batch, 0);
2873     ADVANCE_BATCH(batch);
2874 }
2875
2876 static void
2877 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2878 {
2879     struct i965_driver_data *i965 = i965_driver_data(ctx);
2880     struct intel_batchbuffer *batch = i965->batch;
2881     struct i965_render_state *render_state = &i965->render_state;
2882     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2883     unsigned int num_samples = 0;
2884
2885     if (IS_HASWELL(i965->intel.device_info)) {
2886         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2887         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2888     }
2889
2890     BEGIN_BATCH(batch, 3);
2891     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2892     OUT_BATCH(batch,
2893               GEN7_WM_DISPATCH_ENABLE |
2894               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2895     OUT_BATCH(batch, 0);
2896     ADVANCE_BATCH(batch);
2897
2898     BEGIN_BATCH(batch, 7);
2899     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2900     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2901     OUT_BATCH(batch, 0);
2902     OUT_RELOC(batch,
2903               render_state->curbe.bo,
2904               I915_GEM_DOMAIN_INSTRUCTION, 0,
2905               0);
2906     OUT_BATCH(batch, 0);
2907     OUT_BATCH(batch, 0);
2908     OUT_BATCH(batch, 0);
2909     ADVANCE_BATCH(batch);
2910
2911     BEGIN_BATCH(batch, 8);
2912     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2913     OUT_RELOC(batch,
2914               render_state->render_kernels[kernel].bo,
2915               I915_GEM_DOMAIN_INSTRUCTION, 0,
2916               0);
2917     OUT_BATCH(batch,
2918               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2919               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2920     OUT_BATCH(batch, 0); /* scratch space base offset */
2921     OUT_BATCH(batch,
2922               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2923               GEN7_PS_PUSH_CONSTANT_ENABLE |
2924               GEN7_PS_ATTRIBUTE_ENABLE |
2925               GEN7_PS_16_DISPATCH_ENABLE);
2926     OUT_BATCH(batch,
2927               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2928     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2929     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2930     ADVANCE_BATCH(batch);
2931 }
2932
2933 static void
2934 gen7_emit_vertex_element_state(VADriverContextP ctx)
2935 {
2936     struct i965_driver_data *i965 = i965_driver_data(ctx);
2937     struct intel_batchbuffer *batch = i965->batch;
2938
2939     /* Set up our vertex elements, sourced from the single vertex buffer. */
2940     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2941     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2942     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2943               GEN6_VE0_VALID |
2944               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2945               (0 << VE0_OFFSET_SHIFT));
2946     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2947               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2948               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2949               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2950     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2951     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2952               GEN6_VE0_VALID |
2953               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2954               (8 << VE0_OFFSET_SHIFT));
2955     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2956               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2957               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2958               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2959 }
2960
2961 static void
2962 gen7_emit_vertices(VADriverContextP ctx)
2963 {
2964     struct i965_driver_data *i965 = i965_driver_data(ctx);
2965     struct intel_batchbuffer *batch = i965->batch;
2966     struct i965_render_state *render_state = &i965->render_state;
2967
2968     BEGIN_BATCH(batch, 5);
2969     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2970     OUT_BATCH(batch,
2971               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2972               GEN6_VB0_VERTEXDATA |
2973               GEN7_VB0_ADDRESS_MODIFYENABLE |
2974               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2975     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2976     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2977     OUT_BATCH(batch, 0);
2978     ADVANCE_BATCH(batch);
2979
2980     BEGIN_BATCH(batch, 7);
2981     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2982     OUT_BATCH(batch,
2983               _3DPRIM_RECTLIST |
2984               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2985     OUT_BATCH(batch, 3); /* vertex count per instance */
2986     OUT_BATCH(batch, 0); /* start vertex offset */
2987     OUT_BATCH(batch, 1); /* single instance */
2988     OUT_BATCH(batch, 0); /* start instance location */
2989     OUT_BATCH(batch, 0);
2990     ADVANCE_BATCH(batch);
2991 }
2992
2993 static void
2994 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2995 {
2996     struct i965_driver_data *i965 = i965_driver_data(ctx);
2997     struct intel_batchbuffer *batch = i965->batch;
2998
2999     intel_batchbuffer_start_atomic(batch, 0x1000);
3000     intel_batchbuffer_emit_mi_flush(batch);
3001     gen7_emit_invarient_states(ctx);
3002     gen7_emit_state_base_address(ctx);
3003     gen7_emit_viewport_state_pointers(ctx);
3004     gen7_emit_urb(ctx);
3005     gen7_emit_cc_state_pointers(ctx);
3006     gen7_emit_sampler_state_pointers(ctx);
3007     gen7_emit_bypass_state(ctx);
3008     gen7_emit_vs_state(ctx);
3009     gen7_emit_clip_state(ctx);
3010     gen7_emit_sf_state(ctx);
3011     gen7_emit_wm_state(ctx, kernel);
3012     gen7_emit_binding_table(ctx);
3013     gen7_emit_depth_buffer_state(ctx);
3014     gen7_emit_drawing_rectangle(ctx);
3015     gen7_emit_vertex_element_state(ctx);
3016     gen7_emit_vertices(ctx);
3017     intel_batchbuffer_end_atomic(batch);
3018 }
3019
3020
3021 static void
3022 gen7_render_put_surface(
3023     VADriverContextP   ctx,
3024     struct object_surface *obj_surface,
3025     const VARectangle *src_rect,
3026     const VARectangle *dst_rect,
3027     unsigned int       flags
3028 )
3029 {
3030     struct i965_driver_data *i965 = i965_driver_data(ctx);
3031     struct intel_batchbuffer *batch = i965->batch;
3032
3033     gen7_render_initialize(ctx);
3034     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3035     i965_clear_dest_region(ctx);
3036     gen7_render_emit_states(ctx, PS_KERNEL);
3037     intel_batchbuffer_flush(batch);
3038 }
3039
3040
3041 static void
3042 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3043 {
3044     struct i965_driver_data *i965 = i965_driver_data(ctx);
3045     struct i965_render_state *render_state = &i965->render_state;
3046     struct gen6_blend_state *blend_state;
3047
3048     dri_bo_unmap(render_state->cc.state);
3049     dri_bo_map(render_state->cc.blend, 1);
3050     assert(render_state->cc.blend->virtual);
3051     blend_state = render_state->cc.blend->virtual;
3052     memset(blend_state, 0, sizeof(*blend_state));
3053     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3054     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3055     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3056     blend_state->blend0.blend_enable = 1;
3057     blend_state->blend1.post_blend_clamp_enable = 1;
3058     blend_state->blend1.pre_blend_clamp_enable = 1;
3059     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3060     dri_bo_unmap(render_state->cc.blend);
3061 }
3062
3063 static void
3064 gen7_subpicture_render_setup_states(
3065     VADriverContextP   ctx,
3066     struct object_surface *obj_surface,
3067     const VARectangle *src_rect,
3068     const VARectangle *dst_rect
3069 )
3070 {
3071     i965_render_dest_surface_state(ctx, 0);
3072     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3073     i965_render_sampler(ctx);
3074     i965_render_cc_viewport(ctx);
3075     gen7_render_color_calc_state(ctx);
3076     gen7_subpicture_render_blend_state(ctx);
3077     gen7_render_depth_stencil_state(ctx);
3078     i965_subpic_render_upload_constants(ctx, obj_surface);
3079     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3080 }
3081
3082 static void
3083 gen7_render_put_subpicture(
3084     VADriverContextP   ctx,
3085     struct object_surface *obj_surface,
3086     const VARectangle *src_rect,
3087     const VARectangle *dst_rect
3088 )
3089 {
3090     struct i965_driver_data *i965 = i965_driver_data(ctx);
3091     struct intel_batchbuffer *batch = i965->batch;
3092     unsigned int index = obj_surface->subpic_render_idx;
3093     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3094
3095     assert(obj_subpic);
3096     gen7_render_initialize(ctx);
3097     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3098     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3099     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3100     intel_batchbuffer_flush(batch);
3101 }
3102
3103
3104 void
3105 intel_render_put_surface(
3106     VADriverContextP   ctx,
3107     struct object_surface *obj_surface,
3108     const VARectangle *src_rect,
3109     const VARectangle *dst_rect,
3110     unsigned int       flags
3111 )
3112 {
3113     struct i965_driver_data *i965 = i965_driver_data(ctx);
3114     struct i965_render_state *render_state = &i965->render_state;
3115     int has_done_scaling = 0;
3116     VARectangle calibrated_rect;
3117     VASurfaceID out_surface_id = i965_post_processing(ctx,
3118                                                       obj_surface,
3119                                                       src_rect,
3120                                                       dst_rect,
3121                                                       flags,
3122                                                       &has_done_scaling,
3123                                                       &calibrated_rect);
3124
3125     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3126
3127     if (out_surface_id != VA_INVALID_ID) {
3128         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3129
3130         if (new_obj_surface && new_obj_surface->bo)
3131             obj_surface = new_obj_surface;
3132
3133         if (has_done_scaling)
3134             src_rect = &calibrated_rect;
3135     }
3136
3137     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3138
3139     if (out_surface_id != VA_INVALID_ID)
3140         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3141 }
3142
3143 void
3144 intel_render_put_subpicture(
3145     VADriverContextP   ctx,
3146     struct object_surface *obj_surface,
3147     const VARectangle *src_rect,
3148     const VARectangle *dst_rect
3149 )
3150 {
3151     struct i965_driver_data *i965 = i965_driver_data(ctx);
3152     struct i965_render_state *render_state = &i965->render_state;
3153
3154     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3155 }
3156
3157 static void
3158 genx_render_terminate(VADriverContextP ctx)
3159 {
3160     int i;
3161     struct i965_driver_data *i965 = i965_driver_data(ctx);
3162     struct i965_render_state *render_state = &i965->render_state;
3163
3164     dri_bo_unreference(render_state->curbe.bo);
3165     render_state->curbe.bo = NULL;
3166
3167     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3168         struct i965_kernel *kernel = &render_state->render_kernels[i];
3169
3170         dri_bo_unreference(kernel->bo);
3171         kernel->bo = NULL;
3172     }
3173
3174     dri_bo_unreference(render_state->vb.vertex_buffer);
3175     render_state->vb.vertex_buffer = NULL;
3176     dri_bo_unreference(render_state->vs.state);
3177     render_state->vs.state = NULL;
3178     dri_bo_unreference(render_state->sf.state);
3179     render_state->sf.state = NULL;
3180     dri_bo_unreference(render_state->wm.sampler);
3181     render_state->wm.sampler = NULL;
3182     dri_bo_unreference(render_state->wm.state);
3183     render_state->wm.state = NULL;
3184     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3185     dri_bo_unreference(render_state->cc.viewport);
3186     render_state->cc.viewport = NULL;
3187     dri_bo_unreference(render_state->cc.state);
3188     render_state->cc.state = NULL;
3189     dri_bo_unreference(render_state->cc.blend);
3190     render_state->cc.blend = NULL;
3191     dri_bo_unreference(render_state->cc.depth_stencil);
3192     render_state->cc.depth_stencil = NULL;
3193
3194     if (render_state->draw_region) {
3195         dri_bo_unreference(render_state->draw_region->bo);
3196         free(render_state->draw_region);
3197         render_state->draw_region = NULL;
3198     }
3199 }
3200
3201 bool
3202 genx_render_init(VADriverContextP ctx)
3203 {
3204     struct i965_driver_data *i965 = i965_driver_data(ctx);
3205     struct i965_render_state *render_state = &i965->render_state;
3206     int i;
3207
3208     /* kernel */
3209     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
3210                                  sizeof(render_kernels_gen5[0])));
3211     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
3212                                  sizeof(render_kernels_gen6[0])));
3213
3214     if (IS_GEN7(i965->intel.device_info)) {
3215         memcpy(render_state->render_kernels,
3216                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3217                sizeof(render_state->render_kernels));
3218         render_state->render_put_surface = gen7_render_put_surface;
3219         render_state->render_put_subpicture = gen7_render_put_subpicture;
3220     } else if (IS_GEN6(i965->intel.device_info)) {
3221         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3222         render_state->render_put_surface = gen6_render_put_surface;
3223         render_state->render_put_subpicture = gen6_render_put_subpicture;
3224     } else if (IS_IRONLAKE(i965->intel.device_info)) {
3225         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3226         render_state->render_put_surface = i965_render_put_surface;
3227         render_state->render_put_subpicture = i965_render_put_subpicture;
3228     } else {
3229         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3230         render_state->render_put_surface = i965_render_put_surface;
3231         render_state->render_put_subpicture = i965_render_put_subpicture;
3232     }
3233
3234     render_state->render_terminate = genx_render_terminate;
3235
3236     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3237         struct i965_kernel *kernel = &render_state->render_kernels[i];
3238
3239         if (!kernel->size)
3240             continue;
3241
3242         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
3243                                   kernel->name,
3244                                   kernel->size, 0x1000);
3245         assert(kernel->bo);
3246         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3247     }
3248
3249     /* constant buffer */
3250     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3251                                           "constant buffer",
3252                                           4096, 64);
3253     assert(render_state->curbe.bo);
3254
3255     return true;
3256 }
3257
3258 bool
3259 i965_render_init(VADriverContextP ctx)
3260 {
3261     struct i965_driver_data *i965 = i965_driver_data(ctx);
3262
3263     return i965->codec_info->render_init(ctx);
3264 }
3265
3266 void
3267 i965_render_terminate(VADriverContextP ctx)
3268 {
3269     struct i965_driver_data *i965 = i965_driver_data(ctx);
3270     struct i965_render_state *render_state = &i965->render_state;
3271
3272     render_state->render_terminate(ctx);
3273 }