OSDN Git Service

Add the support of color BT709/SMPTE240M for color-space conversion on BDW
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 /*TODO: Modify the shader for GEN8.
151  * Now it only uses the shader for gen7/haswell
152  */
153 /* Programs for Gen8 */
154 static const uint32_t sf_kernel_static_gen8[][4] = 
155 {
156 };
157 static const uint32_t ps_kernel_static_gen8[][4] = {
158 #include "shaders/render/exa_wm_src_affine.g8b"
159 #include "shaders/render/exa_wm_src_sample_planar.g8b"
160 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
161 #include "shaders/render/exa_wm_yuv_rgb.g8b"
162 #include "shaders/render/exa_wm_write.g8b"
163 };
164
165 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
166 #include "shaders/render/exa_wm_src_affine.g8b"
167 #include "shaders/render/exa_wm_src_sample_argb.g8b"
168 #include "shaders/render/exa_wm_write.g8b"
169 };
170
171
172 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \
173                                 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
174
175 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
176 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
177
178 static uint32_t float_to_uint (float f) 
179 {
180     union {
181         uint32_t i; 
182         float f;
183     } x;
184
185     x.f = f;
186     return x.i;
187 }
188
189 enum 
190 {
191     SF_KERNEL = 0,
192     PS_KERNEL,
193     PS_SUBPIC_KERNEL
194 };
195
196 static struct i965_kernel render_kernels_gen4[] = {
197     {
198         "SF",
199         SF_KERNEL,
200         sf_kernel_static,
201         sizeof(sf_kernel_static),
202         NULL
203     },
204     {
205         "PS",
206         PS_KERNEL,
207         ps_kernel_static,
208         sizeof(ps_kernel_static),
209         NULL
210     },
211
212     {
213         "PS_SUBPIC",
214         PS_SUBPIC_KERNEL,
215         ps_subpic_kernel_static,
216         sizeof(ps_subpic_kernel_static),
217         NULL
218     }
219 };
220
221 static struct i965_kernel render_kernels_gen5[] = {
222     {
223         "SF",
224         SF_KERNEL,
225         sf_kernel_static_gen5,
226         sizeof(sf_kernel_static_gen5),
227         NULL
228     },
229     {
230         "PS",
231         PS_KERNEL,
232         ps_kernel_static_gen5,
233         sizeof(ps_kernel_static_gen5),
234         NULL
235     },
236
237     {
238         "PS_SUBPIC",
239         PS_SUBPIC_KERNEL,
240         ps_subpic_kernel_static_gen5,
241         sizeof(ps_subpic_kernel_static_gen5),
242         NULL
243     }
244 };
245
246 static struct i965_kernel render_kernels_gen6[] = {
247     {
248         "SF",
249         SF_KERNEL,
250         sf_kernel_static_gen6,
251         sizeof(sf_kernel_static_gen6),
252         NULL
253     },
254     {
255         "PS",
256         PS_KERNEL,
257         ps_kernel_static_gen6,
258         sizeof(ps_kernel_static_gen6),
259         NULL
260     },
261
262     {
263         "PS_SUBPIC",
264         PS_SUBPIC_KERNEL,
265         ps_subpic_kernel_static_gen6,
266         sizeof(ps_subpic_kernel_static_gen6),
267         NULL
268     }
269 };
270
271 static struct i965_kernel render_kernels_gen7[] = {
272     {
273         "SF",
274         SF_KERNEL,
275         sf_kernel_static_gen7,
276         sizeof(sf_kernel_static_gen7),
277         NULL
278     },
279     {
280         "PS",
281         PS_KERNEL,
282         ps_kernel_static_gen7,
283         sizeof(ps_kernel_static_gen7),
284         NULL
285     },
286
287     {
288         "PS_SUBPIC",
289         PS_SUBPIC_KERNEL,
290         ps_subpic_kernel_static_gen7,
291         sizeof(ps_subpic_kernel_static_gen7),
292         NULL
293     }
294 };
295
296 static struct i965_kernel render_kernels_gen7_haswell[] = {
297     {
298         "SF",
299         SF_KERNEL,
300         sf_kernel_static_gen7,
301         sizeof(sf_kernel_static_gen7),
302         NULL
303     },
304     {
305         "PS",
306         PS_KERNEL,
307         ps_kernel_static_gen7_haswell,
308         sizeof(ps_kernel_static_gen7_haswell),
309         NULL
310     },
311
312     {
313         "PS_SUBPIC",
314         PS_SUBPIC_KERNEL,
315         ps_subpic_kernel_static_gen7,
316         sizeof(ps_subpic_kernel_static_gen7),
317         NULL
318     }
319 };
320
321 static struct i965_kernel render_kernels_gen8[] = {
322     {
323         "SF",
324         SF_KERNEL,
325         sf_kernel_static_gen8,
326         sizeof(sf_kernel_static_gen8),
327         NULL
328     },
329     {
330         "PS",
331         PS_KERNEL,
332         ps_kernel_static_gen8,
333         sizeof(ps_kernel_static_gen8),
334         NULL
335     },
336
337     {
338         "PS_SUBPIC",
339         PS_SUBPIC_KERNEL,
340         ps_subpic_kernel_static_gen8,
341         sizeof(ps_subpic_kernel_static_gen8),
342         NULL
343     }
344 };
345
346 #define URB_VS_ENTRIES        8
347 #define URB_VS_ENTRY_SIZE     1
348
349 #define URB_GS_ENTRIES        0
350 #define URB_GS_ENTRY_SIZE     0
351
352 #define URB_CLIP_ENTRIES      0
353 #define URB_CLIP_ENTRY_SIZE   0
354
355 #define URB_SF_ENTRIES        1
356 #define URB_SF_ENTRY_SIZE     2
357
358 #define URB_CS_ENTRIES        4
359 #define URB_CS_ENTRY_SIZE     4
360
361 static float yuv_to_rgb_bt601[3][4] = {
362 {1.164,         0,      1.596,          -0.06275,},
363 {1.164,         -0.392, -0.813,         -0.50196,},
364 {1.164,         2.017,  0,              -0.50196,},
365 };
366
367 static float yuv_to_rgb_bt709[3][4] = {
368 {1.164,         0,      1.793,          -0.06275,},
369 {1.164,         -0.213, -0.533,         -0.50196,},
370 {1.164,         2.112,  0,              -0.50196,},
371 };
372
373 static float yuv_to_rgb_smpte_240[3][4] = {
374 {1.164,         0,      1.794,          -0.06275,},
375 {1.164,         -0.258, -0.5425,        -0.50196,},
376 {1.164,         2.078,  0,              -0.50196,},
377 };
378
379 static void
380 i965_render_vs_unit(VADriverContextP ctx)
381 {
382     struct i965_driver_data *i965 = i965_driver_data(ctx);
383     struct i965_render_state *render_state = &i965->render_state;
384     struct i965_vs_unit_state *vs_state;
385
386     dri_bo_map(render_state->vs.state, 1);
387     assert(render_state->vs.state->virtual);
388     vs_state = render_state->vs.state->virtual;
389     memset(vs_state, 0, sizeof(*vs_state));
390
391     if (IS_IRONLAKE(i965->intel.device_id))
392         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
393     else
394         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
395
396     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
397     vs_state->vs6.vs_enable = 0;
398     vs_state->vs6.vert_cache_disable = 1;
399     
400     dri_bo_unmap(render_state->vs.state);
401 }
402
403 static void
404 i965_render_sf_unit(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_render_state *render_state = &i965->render_state;
408     struct i965_sf_unit_state *sf_state;
409
410     dri_bo_map(render_state->sf.state, 1);
411     assert(render_state->sf.state->virtual);
412     sf_state = render_state->sf.state->virtual;
413     memset(sf_state, 0, sizeof(*sf_state));
414
415     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
416     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
417
418     sf_state->sf1.single_program_flow = 1; /* XXX */
419     sf_state->sf1.binding_table_entry_count = 0;
420     sf_state->sf1.thread_priority = 0;
421     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
422     sf_state->sf1.illegal_op_exception_enable = 1;
423     sf_state->sf1.mask_stack_exception_enable = 1;
424     sf_state->sf1.sw_exception_enable = 1;
425
426     /* scratch space is not used in our kernel */
427     sf_state->thread2.per_thread_scratch_space = 0;
428     sf_state->thread2.scratch_space_base_pointer = 0;
429
430     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
431     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
432     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
433     sf_state->thread3.urb_entry_read_offset = 0;
434     sf_state->thread3.dispatch_grf_start_reg = 3;
435
436     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
437     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
438     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
439     sf_state->thread4.stats_enable = 1;
440
441     sf_state->sf5.viewport_transform = 0; /* skip viewport */
442
443     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
444     sf_state->sf6.scissor = 0;
445
446     sf_state->sf7.trifan_pv = 2;
447
448     sf_state->sf6.dest_org_vbias = 0x8;
449     sf_state->sf6.dest_org_hbias = 0x8;
450
451     dri_bo_emit_reloc(render_state->sf.state,
452                       I915_GEM_DOMAIN_INSTRUCTION, 0,
453                       sf_state->thread0.grf_reg_count << 1,
454                       offsetof(struct i965_sf_unit_state, thread0),
455                       render_state->render_kernels[SF_KERNEL].bo);
456
457     dri_bo_unmap(render_state->sf.state);
458 }
459
460 static void 
461 i965_render_sampler(VADriverContextP ctx)
462 {
463     struct i965_driver_data *i965 = i965_driver_data(ctx);
464     struct i965_render_state *render_state = &i965->render_state;
465     struct i965_sampler_state *sampler_state;
466     int i;
467     
468     assert(render_state->wm.sampler_count > 0);
469     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
470
471     dri_bo_map(render_state->wm.sampler, 1);
472     assert(render_state->wm.sampler->virtual);
473     sampler_state = render_state->wm.sampler->virtual;
474     for (i = 0; i < render_state->wm.sampler_count; i++) {
475         memset(sampler_state, 0, sizeof(*sampler_state));
476         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
477         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
478         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
479         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
480         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
481         sampler_state++;
482     }
483
484     dri_bo_unmap(render_state->wm.sampler);
485 }
486 static void
487 i965_subpic_render_wm_unit(VADriverContextP ctx)
488 {
489     struct i965_driver_data *i965 = i965_driver_data(ctx);
490     struct i965_render_state *render_state = &i965->render_state;
491     struct i965_wm_unit_state *wm_state;
492
493     assert(render_state->wm.sampler);
494
495     dri_bo_map(render_state->wm.state, 1);
496     assert(render_state->wm.state->virtual);
497     wm_state = render_state->wm.state->virtual;
498     memset(wm_state, 0, sizeof(*wm_state));
499
500     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
501     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
502
503     wm_state->thread1.single_program_flow = 1; /* XXX */
504
505     if (IS_IRONLAKE(i965->intel.device_id))
506         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
507     else
508         wm_state->thread1.binding_table_entry_count = 7;
509
510     wm_state->thread2.scratch_space_base_pointer = 0;
511     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
512
513     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
514     wm_state->thread3.const_urb_entry_read_length = 4;
515     wm_state->thread3.const_urb_entry_read_offset = 0;
516     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
517     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
518
519     wm_state->wm4.stats_enable = 0;
520     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
521
522     if (IS_IRONLAKE(i965->intel.device_id)) {
523         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
524     } else {
525         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
526     }
527
528     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
529     wm_state->wm5.thread_dispatch_enable = 1;
530     wm_state->wm5.enable_16_pix = 1;
531     wm_state->wm5.enable_8_pix = 0;
532     wm_state->wm5.early_depth_test = 1;
533
534     dri_bo_emit_reloc(render_state->wm.state,
535                       I915_GEM_DOMAIN_INSTRUCTION, 0,
536                       wm_state->thread0.grf_reg_count << 1,
537                       offsetof(struct i965_wm_unit_state, thread0),
538                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
539
540     dri_bo_emit_reloc(render_state->wm.state,
541                       I915_GEM_DOMAIN_INSTRUCTION, 0,
542                       wm_state->wm4.sampler_count << 2,
543                       offsetof(struct i965_wm_unit_state, wm4),
544                       render_state->wm.sampler);
545
546     dri_bo_unmap(render_state->wm.state);
547 }
548
549
550 static void
551 i965_render_wm_unit(VADriverContextP ctx)
552 {
553     struct i965_driver_data *i965 = i965_driver_data(ctx);
554     struct i965_render_state *render_state = &i965->render_state;
555     struct i965_wm_unit_state *wm_state;
556
557     assert(render_state->wm.sampler);
558
559     dri_bo_map(render_state->wm.state, 1);
560     assert(render_state->wm.state->virtual);
561     wm_state = render_state->wm.state->virtual;
562     memset(wm_state, 0, sizeof(*wm_state));
563
564     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
565     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
566
567     wm_state->thread1.single_program_flow = 1; /* XXX */
568
569     if (IS_IRONLAKE(i965->intel.device_id))
570         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
571     else
572         wm_state->thread1.binding_table_entry_count = 7;
573
574     wm_state->thread2.scratch_space_base_pointer = 0;
575     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
576
577     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
578     wm_state->thread3.const_urb_entry_read_length = 4;
579     wm_state->thread3.const_urb_entry_read_offset = 0;
580     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
581     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
582
583     wm_state->wm4.stats_enable = 0;
584     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
585
586     if (IS_IRONLAKE(i965->intel.device_id)) {
587         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
588     } else {
589         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
590     }
591
592     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
593     wm_state->wm5.thread_dispatch_enable = 1;
594     wm_state->wm5.enable_16_pix = 1;
595     wm_state->wm5.enable_8_pix = 0;
596     wm_state->wm5.early_depth_test = 1;
597
598     dri_bo_emit_reloc(render_state->wm.state,
599                       I915_GEM_DOMAIN_INSTRUCTION, 0,
600                       wm_state->thread0.grf_reg_count << 1,
601                       offsetof(struct i965_wm_unit_state, thread0),
602                       render_state->render_kernels[PS_KERNEL].bo);
603
604     dri_bo_emit_reloc(render_state->wm.state,
605                       I915_GEM_DOMAIN_INSTRUCTION, 0,
606                       wm_state->wm4.sampler_count << 2,
607                       offsetof(struct i965_wm_unit_state, wm4),
608                       render_state->wm.sampler);
609
610     dri_bo_unmap(render_state->wm.state);
611 }
612
613 static void 
614 i965_render_cc_viewport(VADriverContextP ctx)
615 {
616     struct i965_driver_data *i965 = i965_driver_data(ctx);
617     struct i965_render_state *render_state = &i965->render_state;
618     struct i965_cc_viewport *cc_viewport;
619
620     dri_bo_map(render_state->cc.viewport, 1);
621     assert(render_state->cc.viewport->virtual);
622     cc_viewport = render_state->cc.viewport->virtual;
623     memset(cc_viewport, 0, sizeof(*cc_viewport));
624     
625     cc_viewport->min_depth = -1.e35;
626     cc_viewport->max_depth = 1.e35;
627
628     dri_bo_unmap(render_state->cc.viewport);
629 }
630
631 static void 
632 i965_subpic_render_cc_unit(VADriverContextP ctx)
633 {
634     struct i965_driver_data *i965 = i965_driver_data(ctx);
635     struct i965_render_state *render_state = &i965->render_state;
636     struct i965_cc_unit_state *cc_state;
637
638     assert(render_state->cc.viewport);
639
640     dri_bo_map(render_state->cc.state, 1);
641     assert(render_state->cc.state->virtual);
642     cc_state = render_state->cc.state->virtual;
643     memset(cc_state, 0, sizeof(*cc_state));
644
645     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
646     cc_state->cc2.depth_test = 0;       /* disable depth test */
647     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
648     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
649     cc_state->cc3.blend_enable = 1;     /* enable color blend */
650     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
651     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
652     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
653     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
654
655     cc_state->cc5.dither_enable = 0;    /* disable dither */
656     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
657     cc_state->cc5.statistics_enable = 1;
658     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
659     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
660     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
661
662     cc_state->cc6.clamp_post_alpha_blend = 0; 
663     cc_state->cc6.clamp_pre_alpha_blend  =0; 
664     
665     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
666     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
667     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
668     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
669    
670     /*alpha test reference*/
671     cc_state->cc7.alpha_ref.f =0.0 ;
672
673
674     dri_bo_emit_reloc(render_state->cc.state,
675                       I915_GEM_DOMAIN_INSTRUCTION, 0,
676                       0,
677                       offsetof(struct i965_cc_unit_state, cc4),
678                       render_state->cc.viewport);
679
680     dri_bo_unmap(render_state->cc.state);
681 }
682
683
684 static void 
685 i965_render_cc_unit(VADriverContextP ctx)
686 {
687     struct i965_driver_data *i965 = i965_driver_data(ctx);
688     struct i965_render_state *render_state = &i965->render_state;
689     struct i965_cc_unit_state *cc_state;
690
691     assert(render_state->cc.viewport);
692
693     dri_bo_map(render_state->cc.state, 1);
694     assert(render_state->cc.state->virtual);
695     cc_state = render_state->cc.state->virtual;
696     memset(cc_state, 0, sizeof(*cc_state));
697
698     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
699     cc_state->cc2.depth_test = 0;       /* disable depth test */
700     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
701     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
702     cc_state->cc3.blend_enable = 0;     /* disable color blend */
703     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
704     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
705
706     cc_state->cc5.dither_enable = 0;    /* disable dither */
707     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
708     cc_state->cc5.statistics_enable = 1;
709     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
710     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
711     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
712
713     dri_bo_emit_reloc(render_state->cc.state,
714                       I915_GEM_DOMAIN_INSTRUCTION, 0,
715                       0,
716                       offsetof(struct i965_cc_unit_state, cc4),
717                       render_state->cc.viewport);
718
719     dri_bo_unmap(render_state->cc.state);
720 }
721
722 static void
723 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
724 {
725     switch (tiling) {
726     case I915_TILING_NONE:
727         ss->ss3.tiled_surface = 0;
728         ss->ss3.tile_walk = 0;
729         break;
730     case I915_TILING_X:
731         ss->ss3.tiled_surface = 1;
732         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
733         break;
734     case I915_TILING_Y:
735         ss->ss3.tiled_surface = 1;
736         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
737         break;
738     }
739 }
740
741 static void
742 i965_render_set_surface_state(
743     struct i965_surface_state *ss,
744     dri_bo                    *bo,
745     unsigned long              offset,
746     unsigned int               width,
747     unsigned int               height,
748     unsigned int               pitch,
749     unsigned int               format,
750     unsigned int               flags
751 )
752 {
753     unsigned int tiling;
754     unsigned int swizzle;
755
756     memset(ss, 0, sizeof(*ss));
757
758     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
759     case I965_PP_FLAG_BOTTOM_FIELD:
760         ss->ss0.vert_line_stride_ofs = 1;
761         /* fall-through */
762     case I965_PP_FLAG_TOP_FIELD:
763         ss->ss0.vert_line_stride = 1;
764         height /= 2;
765         break;
766     }
767
768     ss->ss0.surface_type = I965_SURFACE_2D;
769     ss->ss0.surface_format = format;
770     ss->ss0.color_blend = 1;
771
772     ss->ss1.base_addr = bo->offset + offset;
773
774     ss->ss2.width = width - 1;
775     ss->ss2.height = height - 1;
776
777     ss->ss3.pitch = pitch - 1;
778
779     dri_bo_get_tiling(bo, &tiling, &swizzle);
780     i965_render_set_surface_tiling(ss, tiling);
781 }
782
783 static void
784 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
785 {
786    switch (tiling) {
787    case I915_TILING_NONE:
788       ss->ss0.tiled_surface = 0;
789       ss->ss0.tile_walk = 0;
790       break;
791    case I915_TILING_X:
792       ss->ss0.tiled_surface = 1;
793       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
794       break;
795    case I915_TILING_Y:
796       ss->ss0.tiled_surface = 1;
797       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
798       break;
799    }
800 }
801
802 static void
803 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
804 {
805    switch (tiling) {
806    case I915_TILING_NONE:
807       ss->ss0.tiled_surface = 0;
808       ss->ss0.tile_walk = 0;
809       break;
810    case I915_TILING_X:
811       ss->ss0.tiled_surface = 1;
812       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
813       break;
814    case I915_TILING_Y:
815       ss->ss0.tiled_surface = 1;
816       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
817       break;
818    }
819 }
820
821 /* Set "Shader Channel Select" */
822 void
823 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
824 {
825     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
826     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
827     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
828     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
829 }
830
831 /* Set "Shader Channel Select" for GEN8+ */
832 void
833 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
834 {
835     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
836     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
837     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
838     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
839 }
840
841 static void
842 gen7_render_set_surface_state(
843     struct gen7_surface_state *ss,
844     dri_bo                    *bo,
845     unsigned long              offset,
846     int                        width,
847     int                        height,
848     int                        pitch,
849     int                        format,
850     unsigned int               flags
851 )
852 {
853     unsigned int tiling;
854     unsigned int swizzle;
855
856     memset(ss, 0, sizeof(*ss));
857
858     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
859     case I965_PP_FLAG_BOTTOM_FIELD:
860         ss->ss0.vert_line_stride_ofs = 1;
861         /* fall-through */
862     case I965_PP_FLAG_TOP_FIELD:
863         ss->ss0.vert_line_stride = 1;
864         height /= 2;
865         break;
866     }
867
868     ss->ss0.surface_type = I965_SURFACE_2D;
869     ss->ss0.surface_format = format;
870
871     ss->ss1.base_addr = bo->offset + offset;
872
873     ss->ss2.width = width - 1;
874     ss->ss2.height = height - 1;
875
876     ss->ss3.pitch = pitch - 1;
877
878     dri_bo_get_tiling(bo, &tiling, &swizzle);
879     gen7_render_set_surface_tiling(ss, tiling);
880 }
881
882
883 static void
884 gen8_render_set_surface_state(
885     struct gen8_surface_state *ss,
886     dri_bo                    *bo,
887     unsigned long              offset,
888     int                        width,
889     int                        height,
890     int                        pitch,
891     int                        format,
892     unsigned int               flags
893 )
894 {
895     unsigned int tiling;
896     unsigned int swizzle;
897
898     memset(ss, 0, sizeof(*ss));
899
900     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
901     case I965_PP_FLAG_BOTTOM_FIELD:
902         ss->ss0.vert_line_stride_ofs = 1;
903         /* fall-through */
904     case I965_PP_FLAG_TOP_FIELD:
905         ss->ss0.vert_line_stride = 1;
906         height /= 2;
907         break;
908     }
909
910     ss->ss0.surface_type = I965_SURFACE_2D;
911     ss->ss0.surface_format = format;
912
913     ss->ss8.base_addr = bo->offset + offset;
914
915     ss->ss2.width = width - 1;
916     ss->ss2.height = height - 1;
917
918     ss->ss3.pitch = pitch - 1;
919
920     /* Always set 1(align 4 mode) per B-spec */
921     ss->ss0.vertical_alignment = 1;
922     ss->ss0.horizontal_alignment = 1;
923
924     dri_bo_get_tiling(bo, &tiling, &swizzle);
925     gen8_render_set_surface_tiling(ss, tiling);
926 }
927
928 static void
929 i965_render_src_surface_state(
930     VADriverContextP ctx, 
931     int              index,
932     dri_bo          *region,
933     unsigned long    offset,
934     int              w,
935     int              h,
936     int              pitch,
937     int              format,
938     unsigned int     flags
939 )
940 {
941     struct i965_driver_data *i965 = i965_driver_data(ctx);  
942     struct i965_render_state *render_state = &i965->render_state;
943     void *ss;
944     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
945
946     assert(index < MAX_RENDER_SURFACES);
947
948     dri_bo_map(ss_bo, 1);
949     assert(ss_bo->virtual);
950     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
951
952     if (IS_GEN8(i965->intel.device_id)) {
953         gen8_render_set_surface_state(ss,
954                                       region, offset,
955                                       w, h,
956                                       pitch, format, flags);
957         gen8_render_set_surface_scs(ss);
958         dri_bo_emit_reloc(ss_bo,
959                           I915_GEM_DOMAIN_SAMPLER, 0,
960                           offset,
961                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
962                           region);
963     } else  if (IS_GEN7(i965->intel.device_id)) {
964         gen7_render_set_surface_state(ss,
965                                       region, offset,
966                                       w, h,
967                                       pitch, format, flags);
968         if (IS_HASWELL(i965->intel.device_id))
969             gen7_render_set_surface_scs(ss);
970         dri_bo_emit_reloc(ss_bo,
971                           I915_GEM_DOMAIN_SAMPLER, 0,
972                           offset,
973                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
974                           region);
975     } else {
976         i965_render_set_surface_state(ss,
977                                       region, offset,
978                                       w, h,
979                                       pitch, format, flags);
980         dri_bo_emit_reloc(ss_bo,
981                           I915_GEM_DOMAIN_SAMPLER, 0,
982                           offset,
983                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
984                           region);
985     }
986
987     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
988     dri_bo_unmap(ss_bo);
989     render_state->wm.sampler_count++;
990 }
991
992 static void
993 i965_render_src_surfaces_state(
994     VADriverContextP ctx,
995     struct object_surface *obj_surface,
996     unsigned int     flags
997 )
998 {
999     int region_pitch;
1000     int rw, rh;
1001     dri_bo *region;
1002
1003     region_pitch = obj_surface->width;
1004     rw = obj_surface->orig_width;
1005     rh = obj_surface->orig_height;
1006     region = obj_surface->bo;
1007
1008     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
1009     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
1010
1011     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
1012         i965_render_src_surface_state(ctx, 3, region,
1013                                       region_pitch * obj_surface->y_cb_offset,
1014                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1015                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
1016         i965_render_src_surface_state(ctx, 4, region,
1017                                       region_pitch * obj_surface->y_cb_offset,
1018                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1019                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
1020     } else {
1021         i965_render_src_surface_state(ctx, 3, region,
1022                                       region_pitch * obj_surface->y_cb_offset,
1023                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1024                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
1025         i965_render_src_surface_state(ctx, 4, region,
1026                                       region_pitch * obj_surface->y_cb_offset,
1027                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1028                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1029         i965_render_src_surface_state(ctx, 5, region,
1030                                       region_pitch * obj_surface->y_cr_offset,
1031                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1032                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
1033         i965_render_src_surface_state(ctx, 6, region,
1034                                       region_pitch * obj_surface->y_cr_offset,
1035                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1036                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1037     }
1038 }
1039
1040 static void
1041 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
1042                                       struct object_surface *obj_surface)
1043 {
1044     dri_bo *subpic_region;
1045     unsigned int index = obj_surface->subpic_render_idx;
1046     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1047     struct object_image *obj_image = obj_subpic->obj_image;
1048
1049     assert(obj_surface);
1050     assert(obj_surface->bo);
1051     subpic_region = obj_image->bo;
1052     /*subpicture surface*/
1053     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1054     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1055 }
1056
1057 static void
1058 i965_render_dest_surface_state(VADriverContextP ctx, int index)
1059 {
1060     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1061     struct i965_render_state *render_state = &i965->render_state;
1062     struct intel_region *dest_region = render_state->draw_region;
1063     void *ss;
1064     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
1065     int format;
1066     assert(index < MAX_RENDER_SURFACES);
1067
1068     if (dest_region->cpp == 2) {
1069         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
1070     } else {
1071         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
1072     }
1073
1074     dri_bo_map(ss_bo, 1);
1075     assert(ss_bo->virtual);
1076     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
1077
1078     if (IS_GEN8(i965->intel.device_id)) {
1079         gen8_render_set_surface_state(ss,
1080                                       dest_region->bo, 0,
1081                                       dest_region->width, dest_region->height,
1082                                       dest_region->pitch, format, 0);
1083         gen8_render_set_surface_scs(ss);
1084         dri_bo_emit_reloc(ss_bo,
1085                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1086                           0,
1087                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
1088                           dest_region->bo);
1089     } else if (IS_GEN7(i965->intel.device_id)) {
1090         gen7_render_set_surface_state(ss,
1091                                       dest_region->bo, 0,
1092                                       dest_region->width, dest_region->height,
1093                                       dest_region->pitch, format, 0);
1094         if (IS_HASWELL(i965->intel.device_id))
1095             gen7_render_set_surface_scs(ss);
1096         dri_bo_emit_reloc(ss_bo,
1097                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1098                           0,
1099                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1100                           dest_region->bo);
1101     } else {
1102         i965_render_set_surface_state(ss,
1103                                       dest_region->bo, 0,
1104                                       dest_region->width, dest_region->height,
1105                                       dest_region->pitch, format, 0);
1106         dri_bo_emit_reloc(ss_bo,
1107                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1108                           0,
1109                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1110                           dest_region->bo);
1111     }
1112
1113     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1114     dri_bo_unmap(ss_bo);
1115 }
1116
1117 static void
1118 i965_fill_vertex_buffer(
1119     VADriverContextP ctx,
1120     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1121     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1122 )
1123 {
1124     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1125     float vb[12];
1126
1127     enum { X1, Y1, X2, Y2 };
1128
1129     static const unsigned int g_rotation_indices[][6] = {
1130         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1131         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1132         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1133         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1134     };
1135
1136     const unsigned int * const rotation_indices =
1137         g_rotation_indices[i965->rotation_attrib->value];
1138
1139     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1140     vb[1]  = tex_coords[rotation_indices[1]];
1141     vb[2]  = vid_coords[X2];
1142     vb[3]  = vid_coords[Y2];
1143
1144     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1145     vb[5]  = tex_coords[rotation_indices[3]];
1146     vb[6]  = vid_coords[X1];
1147     vb[7]  = vid_coords[Y2];
1148
1149     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1150     vb[9]  = tex_coords[rotation_indices[5]];
1151     vb[10] = vid_coords[X1];
1152     vb[11] = vid_coords[Y1];
1153
1154     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1155 }
1156
1157 static void 
1158 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1159                                  struct object_surface *obj_surface,
1160                                  const VARectangle *output_rect)
1161 {    
1162     unsigned int index = obj_surface->subpic_render_idx;
1163     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1164     float tex_coords[4], vid_coords[4];
1165     VARectangle dst_rect;
1166
1167     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1168         dst_rect = obj_subpic->dst_rect;
1169     else {
1170         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1171         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1172         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1173         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1174         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1175         dst_rect.height = sy * obj_subpic->dst_rect.height;
1176     }
1177
1178     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1179     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1180     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1181     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1182
1183     vid_coords[0] = dst_rect.x;
1184     vid_coords[1] = dst_rect.y;
1185     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1186     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1187
1188     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1189 }
1190
1191 static void 
1192 i965_render_upload_vertex(
1193     VADriverContextP   ctx,
1194     struct object_surface *obj_surface,
1195     const VARectangle *src_rect,
1196     const VARectangle *dst_rect
1197 )
1198 {
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct i965_render_state *render_state = &i965->render_state;
1201     struct intel_region *dest_region = render_state->draw_region;
1202     float tex_coords[4], vid_coords[4];
1203     int width, height;
1204
1205     width  = obj_surface->orig_width;
1206     height = obj_surface->orig_height;
1207
1208     tex_coords[0] = (float)src_rect->x / width;
1209     tex_coords[1] = (float)src_rect->y / height;
1210     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1211     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1212
1213     vid_coords[0] = dest_region->x + dst_rect->x;
1214     vid_coords[1] = dest_region->y + dst_rect->y;
1215     vid_coords[2] = vid_coords[0] + dst_rect->width;
1216     vid_coords[3] = vid_coords[1] + dst_rect->height;
1217
1218     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1219 }
1220
1221 #define PI  3.1415926
1222
1223 static void
1224 i965_render_upload_constants(VADriverContextP ctx,
1225                              struct object_surface *obj_surface,
1226                              unsigned int flags)
1227 {
1228     struct i965_driver_data *i965 = i965_driver_data(ctx);
1229     struct i965_render_state *render_state = &i965->render_state;
1230     unsigned short *constant_buffer;
1231     float *color_balance_base;
1232     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1233     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1234     float hue = (float)i965->hue_attrib->value / 180 * PI;
1235     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1236     float *yuv_to_rgb;
1237     unsigned int color_flag;
1238
1239     dri_bo_map(render_state->curbe.bo, 1);
1240     assert(render_state->curbe.bo->virtual);
1241     constant_buffer = render_state->curbe.bo->virtual;
1242
1243     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1244         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1245
1246         constant_buffer[0] = 2;
1247     } else {
1248         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1249             constant_buffer[0] = 1;
1250         else
1251             constant_buffer[0] = 0;
1252     }
1253
1254     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1255         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1256         i965->hue_attrib->value == DEFAULT_HUE &&
1257         i965->saturation_attrib->value == DEFAULT_SATURATION)
1258         constant_buffer[1] = 1; /* skip color balance transformation */
1259     else
1260         constant_buffer[1] = 0;
1261
1262     color_balance_base = (float *)constant_buffer + 4;
1263     *color_balance_base++ = contrast;
1264     *color_balance_base++ = brightness;
1265     *color_balance_base++ = cos(hue) * contrast * saturation;
1266     *color_balance_base++ = sin(hue) * contrast * saturation;
1267
1268     color_flag = flags & VA_SRC_COLOR_MASK;
1269     yuv_to_rgb = (float *)constant_buffer + 8;
1270     if (color_flag == VA_SRC_BT709)
1271         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1272     else if (color_flag == VA_SRC_SMPTE_240)
1273         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1274     else
1275         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1276
1277     dri_bo_unmap(render_state->curbe.bo);
1278 }
1279
1280 static void
1281 i965_subpic_render_upload_constants(VADriverContextP ctx,
1282                                     struct object_surface *obj_surface)
1283 {
1284     struct i965_driver_data *i965 = i965_driver_data(ctx);
1285     struct i965_render_state *render_state = &i965->render_state;
1286     float *constant_buffer;
1287     float global_alpha = 1.0;
1288     unsigned int index = obj_surface->subpic_render_idx;
1289     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1290     
1291     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1292         global_alpha = obj_subpic->global_alpha;
1293     }
1294
1295     dri_bo_map(render_state->curbe.bo, 1);
1296
1297     assert(render_state->curbe.bo->virtual);
1298     constant_buffer = render_state->curbe.bo->virtual;
1299     *constant_buffer = global_alpha;
1300
1301     dri_bo_unmap(render_state->curbe.bo);
1302 }
1303  
1304 static void
1305 i965_surface_render_state_setup(
1306     VADriverContextP   ctx,
1307     struct object_surface *obj_surface,
1308     const VARectangle *src_rect,
1309     const VARectangle *dst_rect,
1310     unsigned int       flags
1311 )
1312 {
1313     i965_render_vs_unit(ctx);
1314     i965_render_sf_unit(ctx);
1315     i965_render_dest_surface_state(ctx, 0);
1316     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1317     i965_render_sampler(ctx);
1318     i965_render_wm_unit(ctx);
1319     i965_render_cc_viewport(ctx);
1320     i965_render_cc_unit(ctx);
1321     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1322     i965_render_upload_constants(ctx, obj_surface, flags);
1323 }
1324
1325 static void
1326 i965_subpic_render_state_setup(
1327     VADriverContextP   ctx,
1328     struct object_surface *obj_surface,
1329     const VARectangle *src_rect,
1330     const VARectangle *dst_rect
1331 )
1332 {
1333     i965_render_vs_unit(ctx);
1334     i965_render_sf_unit(ctx);
1335     i965_render_dest_surface_state(ctx, 0);
1336     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1337     i965_render_sampler(ctx);
1338     i965_subpic_render_wm_unit(ctx);
1339     i965_render_cc_viewport(ctx);
1340     i965_subpic_render_cc_unit(ctx);
1341     i965_subpic_render_upload_constants(ctx, obj_surface);
1342     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1343 }
1344
1345
1346 static void
1347 i965_render_pipeline_select(VADriverContextP ctx)
1348 {
1349     struct i965_driver_data *i965 = i965_driver_data(ctx);
1350     struct intel_batchbuffer *batch = i965->batch;
1351  
1352     BEGIN_BATCH(batch, 1);
1353     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1354     ADVANCE_BATCH(batch);
1355 }
1356
1357 static void
1358 i965_render_state_sip(VADriverContextP ctx)
1359 {
1360     struct i965_driver_data *i965 = i965_driver_data(ctx);
1361     struct intel_batchbuffer *batch = i965->batch;
1362
1363     BEGIN_BATCH(batch, 2);
1364     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1365     OUT_BATCH(batch, 0);
1366     ADVANCE_BATCH(batch);
1367 }
1368
1369 static void
1370 i965_render_state_base_address(VADriverContextP ctx)
1371 {
1372     struct i965_driver_data *i965 = i965_driver_data(ctx);
1373     struct intel_batchbuffer *batch = i965->batch;
1374     struct i965_render_state *render_state = &i965->render_state;
1375
1376     if (IS_IRONLAKE(i965->intel.device_id)) {
1377         BEGIN_BATCH(batch, 8);
1378         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1379         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1380         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1381         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1382         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1383         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1384         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1385         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386         ADVANCE_BATCH(batch);
1387     } else {
1388         BEGIN_BATCH(batch, 6);
1389         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1390         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1391         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1392         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1393         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1394         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1395         ADVANCE_BATCH(batch);
1396     }
1397 }
1398
1399 static void
1400 i965_render_binding_table_pointers(VADriverContextP ctx)
1401 {
1402     struct i965_driver_data *i965 = i965_driver_data(ctx);
1403     struct intel_batchbuffer *batch = i965->batch;
1404
1405     BEGIN_BATCH(batch, 6);
1406     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1407     OUT_BATCH(batch, 0); /* vs */
1408     OUT_BATCH(batch, 0); /* gs */
1409     OUT_BATCH(batch, 0); /* clip */
1410     OUT_BATCH(batch, 0); /* sf */
1411     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1412     ADVANCE_BATCH(batch);
1413 }
1414
1415 static void 
1416 i965_render_constant_color(VADriverContextP ctx)
1417 {
1418     struct i965_driver_data *i965 = i965_driver_data(ctx);
1419     struct intel_batchbuffer *batch = i965->batch;
1420
1421     BEGIN_BATCH(batch, 5);
1422     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1423     OUT_BATCH(batch, float_to_uint(1.0));
1424     OUT_BATCH(batch, float_to_uint(0.0));
1425     OUT_BATCH(batch, float_to_uint(1.0));
1426     OUT_BATCH(batch, float_to_uint(1.0));
1427     ADVANCE_BATCH(batch);
1428 }
1429
1430 static void
1431 i965_render_pipelined_pointers(VADriverContextP ctx)
1432 {
1433     struct i965_driver_data *i965 = i965_driver_data(ctx);
1434     struct intel_batchbuffer *batch = i965->batch;
1435     struct i965_render_state *render_state = &i965->render_state;
1436
1437     BEGIN_BATCH(batch, 7);
1438     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1439     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1440     OUT_BATCH(batch, 0);  /* disable GS */
1441     OUT_BATCH(batch, 0);  /* disable CLIP */
1442     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1443     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1444     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1445     ADVANCE_BATCH(batch);
1446 }
1447
1448 static void
1449 i965_render_urb_layout(VADriverContextP ctx)
1450 {
1451     struct i965_driver_data *i965 = i965_driver_data(ctx);
1452     struct intel_batchbuffer *batch = i965->batch;
1453     int urb_vs_start, urb_vs_size;
1454     int urb_gs_start, urb_gs_size;
1455     int urb_clip_start, urb_clip_size;
1456     int urb_sf_start, urb_sf_size;
1457     int urb_cs_start, urb_cs_size;
1458
1459     urb_vs_start = 0;
1460     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1461     urb_gs_start = urb_vs_start + urb_vs_size;
1462     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1463     urb_clip_start = urb_gs_start + urb_gs_size;
1464     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1465     urb_sf_start = urb_clip_start + urb_clip_size;
1466     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1467     urb_cs_start = urb_sf_start + urb_sf_size;
1468     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1469
1470     BEGIN_BATCH(batch, 3);
1471     OUT_BATCH(batch, 
1472               CMD_URB_FENCE |
1473               UF0_CS_REALLOC |
1474               UF0_SF_REALLOC |
1475               UF0_CLIP_REALLOC |
1476               UF0_GS_REALLOC |
1477               UF0_VS_REALLOC |
1478               1);
1479     OUT_BATCH(batch, 
1480               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1481               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1482               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1483     OUT_BATCH(batch,
1484               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1485               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1486     ADVANCE_BATCH(batch);
1487 }
1488
1489 static void 
1490 i965_render_cs_urb_layout(VADriverContextP ctx)
1491 {
1492     struct i965_driver_data *i965 = i965_driver_data(ctx);
1493     struct intel_batchbuffer *batch = i965->batch;
1494
1495     BEGIN_BATCH(batch, 2);
1496     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1497     OUT_BATCH(batch,
1498               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1499               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1500     ADVANCE_BATCH(batch);
1501 }
1502
1503 static void
1504 i965_render_constant_buffer(VADriverContextP ctx)
1505 {
1506     struct i965_driver_data *i965 = i965_driver_data(ctx);
1507     struct intel_batchbuffer *batch = i965->batch;
1508     struct i965_render_state *render_state = &i965->render_state;
1509
1510     BEGIN_BATCH(batch, 2);
1511     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1512     OUT_RELOC(batch, render_state->curbe.bo,
1513               I915_GEM_DOMAIN_INSTRUCTION, 0,
1514               URB_CS_ENTRY_SIZE - 1);
1515     ADVANCE_BATCH(batch);    
1516 }
1517
1518 static void
1519 i965_render_drawing_rectangle(VADriverContextP ctx)
1520 {
1521     struct i965_driver_data *i965 = i965_driver_data(ctx);
1522     struct intel_batchbuffer *batch = i965->batch;
1523     struct i965_render_state *render_state = &i965->render_state;
1524     struct intel_region *dest_region = render_state->draw_region;
1525
1526     BEGIN_BATCH(batch, 4);
1527     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1528     OUT_BATCH(batch, 0x00000000);
1529     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1530     OUT_BATCH(batch, 0x00000000);         
1531     ADVANCE_BATCH(batch);
1532 }
1533
1534 static void
1535 i965_render_vertex_elements(VADriverContextP ctx)
1536 {
1537     struct i965_driver_data *i965 = i965_driver_data(ctx);
1538     struct intel_batchbuffer *batch = i965->batch;
1539
1540     if (IS_IRONLAKE(i965->intel.device_id)) {
1541         BEGIN_BATCH(batch, 5);
1542         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1543         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1544         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1545                   VE0_VALID |
1546                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1547                   (0 << VE0_OFFSET_SHIFT));
1548         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1549                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1550                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1551                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1552         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1553         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1554                   VE0_VALID |
1555                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1556                   (8 << VE0_OFFSET_SHIFT));
1557         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1558                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1559                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1560                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1561         ADVANCE_BATCH(batch);
1562     } else {
1563         BEGIN_BATCH(batch, 5);
1564         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1565         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1566         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1567                   VE0_VALID |
1568                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1569                   (0 << VE0_OFFSET_SHIFT));
1570         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1571                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1572                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1573                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1574                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1575         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1576         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1577                   VE0_VALID |
1578                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1579                   (8 << VE0_OFFSET_SHIFT));
1580         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1581                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1582                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1583                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1584                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1585         ADVANCE_BATCH(batch);
1586     }
1587 }
1588
1589 static void
1590 i965_render_upload_image_palette(
1591     VADriverContextP ctx,
1592     struct object_image *obj_image,
1593     unsigned int     alpha
1594 )
1595 {
1596     struct i965_driver_data *i965 = i965_driver_data(ctx);
1597     struct intel_batchbuffer *batch = i965->batch;
1598     unsigned int i;
1599
1600     assert(obj_image);
1601
1602     if (!obj_image)
1603         return;
1604
1605     if (obj_image->image.num_palette_entries == 0)
1606         return;
1607
1608     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1609     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1610     /*fill palette*/
1611     //int32_t out[16]; //0-23:color 23-31:alpha
1612     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1613         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1614     ADVANCE_BATCH(batch);
1615 }
1616
1617 static void
1618 i965_render_startup(VADriverContextP ctx)
1619 {
1620     struct i965_driver_data *i965 = i965_driver_data(ctx);
1621     struct intel_batchbuffer *batch = i965->batch;
1622     struct i965_render_state *render_state = &i965->render_state;
1623
1624     BEGIN_BATCH(batch, 11);
1625     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1626     OUT_BATCH(batch, 
1627               (0 << VB0_BUFFER_INDEX_SHIFT) |
1628               VB0_VERTEXDATA |
1629               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1630     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1631
1632     if (IS_IRONLAKE(i965->intel.device_id))
1633         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1634     else
1635         OUT_BATCH(batch, 3);
1636
1637     OUT_BATCH(batch, 0);
1638
1639     OUT_BATCH(batch, 
1640               CMD_3DPRIMITIVE |
1641               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1642               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1643               (0 << 9) |
1644               4);
1645     OUT_BATCH(batch, 3); /* vertex count per instance */
1646     OUT_BATCH(batch, 0); /* start vertex offset */
1647     OUT_BATCH(batch, 1); /* single instance */
1648     OUT_BATCH(batch, 0); /* start instance location */
1649     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1650     ADVANCE_BATCH(batch);
1651 }
1652
1653 static void 
1654 i965_clear_dest_region(VADriverContextP ctx)
1655 {
1656     struct i965_driver_data *i965 = i965_driver_data(ctx);
1657     struct intel_batchbuffer *batch = i965->batch;
1658     struct i965_render_state *render_state = &i965->render_state;
1659     struct intel_region *dest_region = render_state->draw_region;
1660     unsigned int blt_cmd, br13;
1661     int pitch;
1662
1663     blt_cmd = XY_COLOR_BLT_CMD;
1664     br13 = 0xf0 << 16;
1665     pitch = dest_region->pitch;
1666
1667     if (dest_region->cpp == 4) {
1668         br13 |= BR13_8888;
1669         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1670     } else {
1671         assert(dest_region->cpp == 2);
1672         br13 |= BR13_565;
1673     }
1674
1675     if (dest_region->tiling != I915_TILING_NONE) {
1676         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1677         pitch /= 4;
1678     }
1679
1680     br13 |= pitch;
1681
1682     if (IS_GEN6(i965->intel.device_id) ||
1683         IS_GEN7(i965->intel.device_id) ||
1684         IS_GEN8(i965->intel.device_id)) {
1685         intel_batchbuffer_start_atomic_blt(batch, 24);
1686         BEGIN_BLT_BATCH(batch, 6);
1687     } else {
1688         intel_batchbuffer_start_atomic(batch, 24);
1689         BEGIN_BATCH(batch, 6);
1690     }
1691
1692     OUT_BATCH(batch, blt_cmd);
1693     OUT_BATCH(batch, br13);
1694     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1695     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1696               (dest_region->x + dest_region->width));
1697     OUT_RELOC(batch, dest_region->bo, 
1698               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1699               0);
1700     OUT_BATCH(batch, 0x0);
1701     ADVANCE_BATCH(batch);
1702     intel_batchbuffer_end_atomic(batch);
1703 }
1704
1705 static void 
1706 gen8_clear_dest_region(VADriverContextP ctx)
1707 {
1708     struct i965_driver_data *i965 = i965_driver_data(ctx);
1709     struct intel_batchbuffer *batch = i965->batch;
1710     struct i965_render_state *render_state = &i965->render_state;
1711     struct intel_region *dest_region = render_state->draw_region;
1712     unsigned int blt_cmd, br13;
1713     int pitch;
1714
1715     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
1716     br13 = 0xf0 << 16;
1717     pitch = dest_region->pitch;
1718
1719     if (dest_region->cpp == 4) {
1720         br13 |= BR13_8888;
1721         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1722     } else {
1723         assert(dest_region->cpp == 2);
1724         br13 |= BR13_565;
1725     }
1726
1727     if (dest_region->tiling != I915_TILING_NONE) {
1728         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1729         pitch /= 4;
1730     }
1731
1732     br13 |= pitch;
1733
1734     intel_batchbuffer_start_atomic_blt(batch, 24);
1735     BEGIN_BLT_BATCH(batch, 7);
1736
1737     OUT_BATCH(batch, blt_cmd);
1738     OUT_BATCH(batch, br13);
1739     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1740     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1741               (dest_region->x + dest_region->width));
1742     OUT_RELOC(batch, dest_region->bo, 
1743               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1744               0);
1745     OUT_BATCH(batch, 0x0);
1746     OUT_BATCH(batch, 0x0);
1747     ADVANCE_BATCH(batch);
1748     intel_batchbuffer_end_atomic(batch);
1749 }
1750
1751 static void
1752 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1753 {
1754     struct i965_driver_data *i965 = i965_driver_data(ctx);
1755     struct intel_batchbuffer *batch = i965->batch;
1756
1757     i965_clear_dest_region(ctx);
1758     intel_batchbuffer_start_atomic(batch, 0x1000);
1759     intel_batchbuffer_emit_mi_flush(batch);
1760     i965_render_pipeline_select(ctx);
1761     i965_render_state_sip(ctx);
1762     i965_render_state_base_address(ctx);
1763     i965_render_binding_table_pointers(ctx);
1764     i965_render_constant_color(ctx);
1765     i965_render_pipelined_pointers(ctx);
1766     i965_render_urb_layout(ctx);
1767     i965_render_cs_urb_layout(ctx);
1768     i965_render_constant_buffer(ctx);
1769     i965_render_drawing_rectangle(ctx);
1770     i965_render_vertex_elements(ctx);
1771     i965_render_startup(ctx);
1772     intel_batchbuffer_end_atomic(batch);
1773 }
1774
1775 static void
1776 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1777 {
1778     struct i965_driver_data *i965 = i965_driver_data(ctx);
1779     struct intel_batchbuffer *batch = i965->batch;
1780
1781     intel_batchbuffer_start_atomic(batch, 0x1000);
1782     intel_batchbuffer_emit_mi_flush(batch);
1783     i965_render_pipeline_select(ctx);
1784     i965_render_state_sip(ctx);
1785     i965_render_state_base_address(ctx);
1786     i965_render_binding_table_pointers(ctx);
1787     i965_render_constant_color(ctx);
1788     i965_render_pipelined_pointers(ctx);
1789     i965_render_urb_layout(ctx);
1790     i965_render_cs_urb_layout(ctx);
1791     i965_render_constant_buffer(ctx);
1792     i965_render_drawing_rectangle(ctx);
1793     i965_render_vertex_elements(ctx);
1794     i965_render_startup(ctx);
1795     intel_batchbuffer_end_atomic(batch);
1796 }
1797
1798
1799 static void 
1800 i965_render_initialize(VADriverContextP ctx)
1801 {
1802     struct i965_driver_data *i965 = i965_driver_data(ctx);
1803     struct i965_render_state *render_state = &i965->render_state;
1804     dri_bo *bo;
1805
1806     /* VERTEX BUFFER */
1807     dri_bo_unreference(render_state->vb.vertex_buffer);
1808     bo = dri_bo_alloc(i965->intel.bufmgr,
1809                       "vertex buffer",
1810                       4096,
1811                       4096);
1812     assert(bo);
1813     render_state->vb.vertex_buffer = bo;
1814
1815     /* VS */
1816     dri_bo_unreference(render_state->vs.state);
1817     bo = dri_bo_alloc(i965->intel.bufmgr,
1818                       "vs state",
1819                       sizeof(struct i965_vs_unit_state),
1820                       64);
1821     assert(bo);
1822     render_state->vs.state = bo;
1823
1824     /* GS */
1825     /* CLIP */
1826     /* SF */
1827     dri_bo_unreference(render_state->sf.state);
1828     bo = dri_bo_alloc(i965->intel.bufmgr,
1829                       "sf state",
1830                       sizeof(struct i965_sf_unit_state),
1831                       64);
1832     assert(bo);
1833     render_state->sf.state = bo;
1834
1835     /* WM */
1836     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1837     bo = dri_bo_alloc(i965->intel.bufmgr,
1838                       "surface state & binding table",
1839                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1840                       4096);
1841     assert(bo);
1842     render_state->wm.surface_state_binding_table_bo = bo;
1843
1844     dri_bo_unreference(render_state->wm.sampler);
1845     bo = dri_bo_alloc(i965->intel.bufmgr,
1846                       "sampler state",
1847                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1848                       64);
1849     assert(bo);
1850     render_state->wm.sampler = bo;
1851     render_state->wm.sampler_count = 0;
1852
1853     dri_bo_unreference(render_state->wm.state);
1854     bo = dri_bo_alloc(i965->intel.bufmgr,
1855                       "wm state",
1856                       sizeof(struct i965_wm_unit_state),
1857                       64);
1858     assert(bo);
1859     render_state->wm.state = bo;
1860
1861     /* COLOR CALCULATOR */
1862     dri_bo_unreference(render_state->cc.state);
1863     bo = dri_bo_alloc(i965->intel.bufmgr,
1864                       "color calc state",
1865                       sizeof(struct i965_cc_unit_state),
1866                       64);
1867     assert(bo);
1868     render_state->cc.state = bo;
1869
1870     dri_bo_unreference(render_state->cc.viewport);
1871     bo = dri_bo_alloc(i965->intel.bufmgr,
1872                       "cc viewport",
1873                       sizeof(struct i965_cc_viewport),
1874                       64);
1875     assert(bo);
1876     render_state->cc.viewport = bo;
1877 }
1878
1879 static void
1880 i965_render_put_surface(
1881     VADriverContextP   ctx,
1882     struct object_surface *obj_surface,
1883     const VARectangle *src_rect,
1884     const VARectangle *dst_rect,
1885     unsigned int       flags
1886 )
1887 {
1888     struct i965_driver_data *i965 = i965_driver_data(ctx);
1889     struct intel_batchbuffer *batch = i965->batch;
1890
1891     i965_render_initialize(ctx);
1892     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1893     i965_surface_render_pipeline_setup(ctx);
1894     intel_batchbuffer_flush(batch);
1895 }
1896
1897 static void
1898 i965_render_put_subpicture(
1899     VADriverContextP   ctx,
1900     struct object_surface *obj_surface,
1901     const VARectangle *src_rect,
1902     const VARectangle *dst_rect
1903 )
1904 {
1905     struct i965_driver_data *i965 = i965_driver_data(ctx);
1906     struct intel_batchbuffer *batch = i965->batch;
1907     unsigned int index = obj_surface->subpic_render_idx;
1908     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1909
1910     assert(obj_subpic);
1911
1912     i965_render_initialize(ctx);
1913     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1914     i965_subpic_render_pipeline_setup(ctx);
1915     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1916     intel_batchbuffer_flush(batch);
1917 }
1918
1919 /*
1920  * for GEN6+
1921  */
1922 static void 
1923 gen6_render_initialize(VADriverContextP ctx)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct i965_render_state *render_state = &i965->render_state;
1927     dri_bo *bo;
1928
1929     /* VERTEX BUFFER */
1930     dri_bo_unreference(render_state->vb.vertex_buffer);
1931     bo = dri_bo_alloc(i965->intel.bufmgr,
1932                       "vertex buffer",
1933                       4096,
1934                       4096);
1935     assert(bo);
1936     render_state->vb.vertex_buffer = bo;
1937
1938     /* WM */
1939     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1940     bo = dri_bo_alloc(i965->intel.bufmgr,
1941                       "surface state & binding table",
1942                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1943                       4096);
1944     assert(bo);
1945     render_state->wm.surface_state_binding_table_bo = bo;
1946
1947     dri_bo_unreference(render_state->wm.sampler);
1948     bo = dri_bo_alloc(i965->intel.bufmgr,
1949                       "sampler state",
1950                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1951                       4096);
1952     assert(bo);
1953     render_state->wm.sampler = bo;
1954     render_state->wm.sampler_count = 0;
1955
1956     /* COLOR CALCULATOR */
1957     dri_bo_unreference(render_state->cc.state);
1958     bo = dri_bo_alloc(i965->intel.bufmgr,
1959                       "color calc state",
1960                       sizeof(struct gen6_color_calc_state),
1961                       4096);
1962     assert(bo);
1963     render_state->cc.state = bo;
1964
1965     /* CC VIEWPORT */
1966     dri_bo_unreference(render_state->cc.viewport);
1967     bo = dri_bo_alloc(i965->intel.bufmgr,
1968                       "cc viewport",
1969                       sizeof(struct i965_cc_viewport),
1970                       4096);
1971     assert(bo);
1972     render_state->cc.viewport = bo;
1973
1974     /* BLEND STATE */
1975     dri_bo_unreference(render_state->cc.blend);
1976     bo = dri_bo_alloc(i965->intel.bufmgr,
1977                       "blend state",
1978                       sizeof(struct gen6_blend_state),
1979                       4096);
1980     assert(bo);
1981     render_state->cc.blend = bo;
1982
1983     /* DEPTH & STENCIL STATE */
1984     dri_bo_unreference(render_state->cc.depth_stencil);
1985     bo = dri_bo_alloc(i965->intel.bufmgr,
1986                       "depth & stencil state",
1987                       sizeof(struct gen6_depth_stencil_state),
1988                       4096);
1989     assert(bo);
1990     render_state->cc.depth_stencil = bo;
1991 }
1992
1993 static void
1994 gen6_render_color_calc_state(VADriverContextP ctx)
1995 {
1996     struct i965_driver_data *i965 = i965_driver_data(ctx);
1997     struct i965_render_state *render_state = &i965->render_state;
1998     struct gen6_color_calc_state *color_calc_state;
1999     
2000     dri_bo_map(render_state->cc.state, 1);
2001     assert(render_state->cc.state->virtual);
2002     color_calc_state = render_state->cc.state->virtual;
2003     memset(color_calc_state, 0, sizeof(*color_calc_state));
2004     color_calc_state->constant_r = 1.0;
2005     color_calc_state->constant_g = 0.0;
2006     color_calc_state->constant_b = 1.0;
2007     color_calc_state->constant_a = 1.0;
2008     dri_bo_unmap(render_state->cc.state);
2009 }
2010
2011 static void
2012 gen6_render_blend_state(VADriverContextP ctx)
2013 {
2014     struct i965_driver_data *i965 = i965_driver_data(ctx);
2015     struct i965_render_state *render_state = &i965->render_state;
2016     struct gen6_blend_state *blend_state;
2017     
2018     dri_bo_map(render_state->cc.blend, 1);
2019     assert(render_state->cc.blend->virtual);
2020     blend_state = render_state->cc.blend->virtual;
2021     memset(blend_state, 0, sizeof(*blend_state));
2022     blend_state->blend1.logic_op_enable = 1;
2023     blend_state->blend1.logic_op_func = 0xc;
2024     dri_bo_unmap(render_state->cc.blend);
2025 }
2026
2027 static void
2028 gen6_render_depth_stencil_state(VADriverContextP ctx)
2029 {
2030     struct i965_driver_data *i965 = i965_driver_data(ctx);
2031     struct i965_render_state *render_state = &i965->render_state;
2032     struct gen6_depth_stencil_state *depth_stencil_state;
2033     
2034     dri_bo_map(render_state->cc.depth_stencil, 1);
2035     assert(render_state->cc.depth_stencil->virtual);
2036     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2037     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2038     dri_bo_unmap(render_state->cc.depth_stencil);
2039 }
2040
2041 static void
2042 gen6_render_setup_states(
2043     VADriverContextP   ctx,
2044     struct object_surface *obj_surface,
2045     const VARectangle *src_rect,
2046     const VARectangle *dst_rect,
2047     unsigned int       flags
2048 )
2049 {
2050     i965_render_dest_surface_state(ctx, 0);
2051     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2052     i965_render_sampler(ctx);
2053     i965_render_cc_viewport(ctx);
2054     gen6_render_color_calc_state(ctx);
2055     gen6_render_blend_state(ctx);
2056     gen6_render_depth_stencil_state(ctx);
2057     i965_render_upload_constants(ctx, obj_surface, flags);
2058     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2059 }
2060
2061 static void
2062 gen6_emit_invarient_states(VADriverContextP ctx)
2063 {
2064     struct i965_driver_data *i965 = i965_driver_data(ctx);
2065     struct intel_batchbuffer *batch = i965->batch;
2066
2067     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2068
2069     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2070     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2071               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2072     OUT_BATCH(batch, 0);
2073
2074     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2075     OUT_BATCH(batch, 1);
2076
2077     /* Set system instruction pointer */
2078     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2079     OUT_BATCH(batch, 0);
2080 }
2081
2082 static void
2083 gen6_emit_state_base_address(VADriverContextP ctx)
2084 {
2085     struct i965_driver_data *i965 = i965_driver_data(ctx);
2086     struct intel_batchbuffer *batch = i965->batch;
2087     struct i965_render_state *render_state = &i965->render_state;
2088
2089     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2090     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2091     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2092     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2093     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2094     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2095     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2096     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2097     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2098     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2099 }
2100
2101 static void
2102 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
2103 {
2104     struct i965_driver_data *i965 = i965_driver_data(ctx);
2105     struct intel_batchbuffer *batch = i965->batch;
2106     struct i965_render_state *render_state = &i965->render_state;
2107
2108     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2109               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2110               (4 - 2));
2111     OUT_BATCH(batch, 0);
2112     OUT_BATCH(batch, 0);
2113     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2114 }
2115
2116 static void
2117 gen6_emit_urb(VADriverContextP ctx)
2118 {
2119     struct i965_driver_data *i965 = i965_driver_data(ctx);
2120     struct intel_batchbuffer *batch = i965->batch;
2121
2122     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
2123     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2124               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2125     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2126               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2127 }
2128
2129 static void
2130 gen6_emit_cc_state_pointers(VADriverContextP ctx)
2131 {
2132     struct i965_driver_data *i965 = i965_driver_data(ctx);
2133     struct intel_batchbuffer *batch = i965->batch;
2134     struct i965_render_state *render_state = &i965->render_state;
2135
2136     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2137     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2138     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2139     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2140 }
2141
2142 static void
2143 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
2144 {
2145     struct i965_driver_data *i965 = i965_driver_data(ctx);
2146     struct intel_batchbuffer *batch = i965->batch;
2147     struct i965_render_state *render_state = &i965->render_state;
2148
2149     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2150               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2151               (4 - 2));
2152     OUT_BATCH(batch, 0); /* VS */
2153     OUT_BATCH(batch, 0); /* GS */
2154     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2155 }
2156
2157 static void
2158 gen6_emit_binding_table(VADriverContextP ctx)
2159 {
2160     struct i965_driver_data *i965 = i965_driver_data(ctx);
2161     struct intel_batchbuffer *batch = i965->batch;
2162
2163     /* Binding table pointers */
2164     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
2165               GEN6_BINDING_TABLE_MODIFY_PS |
2166               (4 - 2));
2167     OUT_BATCH(batch, 0);                /* vs */
2168     OUT_BATCH(batch, 0);                /* gs */
2169     /* Only the PS uses the binding table */
2170     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2171 }
2172
2173 static void
2174 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2175 {
2176     struct i965_driver_data *i965 = i965_driver_data(ctx);
2177     struct intel_batchbuffer *batch = i965->batch;
2178
2179     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2180     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2181               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2182     OUT_BATCH(batch, 0);
2183     OUT_BATCH(batch, 0);
2184     OUT_BATCH(batch, 0);
2185     OUT_BATCH(batch, 0);
2186     OUT_BATCH(batch, 0);
2187
2188     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2189     OUT_BATCH(batch, 0);
2190 }
2191
2192 static void
2193 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2194 {
2195     i965_render_drawing_rectangle(ctx);
2196 }
2197
2198 static void 
2199 gen6_emit_vs_state(VADriverContextP ctx)
2200 {
2201     struct i965_driver_data *i965 = i965_driver_data(ctx);
2202     struct intel_batchbuffer *batch = i965->batch;
2203
2204     /* disable VS constant buffer */
2205     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2206     OUT_BATCH(batch, 0);
2207     OUT_BATCH(batch, 0);
2208     OUT_BATCH(batch, 0);
2209     OUT_BATCH(batch, 0);
2210         
2211     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2212     OUT_BATCH(batch, 0); /* without VS kernel */
2213     OUT_BATCH(batch, 0);
2214     OUT_BATCH(batch, 0);
2215     OUT_BATCH(batch, 0);
2216     OUT_BATCH(batch, 0); /* pass-through */
2217 }
2218
2219 static void 
2220 gen6_emit_gs_state(VADriverContextP ctx)
2221 {
2222     struct i965_driver_data *i965 = i965_driver_data(ctx);
2223     struct intel_batchbuffer *batch = i965->batch;
2224
2225     /* disable GS constant buffer */
2226     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2227     OUT_BATCH(batch, 0);
2228     OUT_BATCH(batch, 0);
2229     OUT_BATCH(batch, 0);
2230     OUT_BATCH(batch, 0);
2231         
2232     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2233     OUT_BATCH(batch, 0); /* without GS kernel */
2234     OUT_BATCH(batch, 0);
2235     OUT_BATCH(batch, 0);
2236     OUT_BATCH(batch, 0);
2237     OUT_BATCH(batch, 0);
2238     OUT_BATCH(batch, 0); /* pass-through */
2239 }
2240
2241 static void 
2242 gen6_emit_clip_state(VADriverContextP ctx)
2243 {
2244     struct i965_driver_data *i965 = i965_driver_data(ctx);
2245     struct intel_batchbuffer *batch = i965->batch;
2246
2247     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2248     OUT_BATCH(batch, 0);
2249     OUT_BATCH(batch, 0); /* pass-through */
2250     OUT_BATCH(batch, 0);
2251 }
2252
2253 static void 
2254 gen6_emit_sf_state(VADriverContextP ctx)
2255 {
2256     struct i965_driver_data *i965 = i965_driver_data(ctx);
2257     struct intel_batchbuffer *batch = i965->batch;
2258
2259     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2260     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2261               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2262               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2263     OUT_BATCH(batch, 0);
2264     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2265     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2266     OUT_BATCH(batch, 0);
2267     OUT_BATCH(batch, 0);
2268     OUT_BATCH(batch, 0);
2269     OUT_BATCH(batch, 0);
2270     OUT_BATCH(batch, 0); /* DW9 */
2271     OUT_BATCH(batch, 0);
2272     OUT_BATCH(batch, 0);
2273     OUT_BATCH(batch, 0);
2274     OUT_BATCH(batch, 0);
2275     OUT_BATCH(batch, 0); /* DW14 */
2276     OUT_BATCH(batch, 0);
2277     OUT_BATCH(batch, 0);
2278     OUT_BATCH(batch, 0);
2279     OUT_BATCH(batch, 0);
2280     OUT_BATCH(batch, 0); /* DW19 */
2281 }
2282
2283 static void 
2284 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2285 {
2286     struct i965_driver_data *i965 = i965_driver_data(ctx);
2287     struct intel_batchbuffer *batch = i965->batch;
2288     struct i965_render_state *render_state = &i965->render_state;
2289
2290     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2291               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2292               (5 - 2));
2293     OUT_RELOC(batch, 
2294               render_state->curbe.bo,
2295               I915_GEM_DOMAIN_INSTRUCTION, 0,
2296               (URB_CS_ENTRY_SIZE-1));
2297     OUT_BATCH(batch, 0);
2298     OUT_BATCH(batch, 0);
2299     OUT_BATCH(batch, 0);
2300
2301     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2302     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2303               I915_GEM_DOMAIN_INSTRUCTION, 0,
2304               0);
2305     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2306               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2307     OUT_BATCH(batch, 0);
2308     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2309     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2310               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2311               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2312     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2313               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2314     OUT_BATCH(batch, 0);
2315     OUT_BATCH(batch, 0);
2316 }
2317
2318 static void
2319 gen6_emit_vertex_element_state(VADriverContextP ctx)
2320 {
2321     struct i965_driver_data *i965 = i965_driver_data(ctx);
2322     struct intel_batchbuffer *batch = i965->batch;
2323
2324     /* Set up our vertex elements, sourced from the single vertex buffer. */
2325     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2326     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2327     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2328               GEN6_VE0_VALID |
2329               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2330               (0 << VE0_OFFSET_SHIFT));
2331     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2332               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2333               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2334               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2335     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2336     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2337               GEN6_VE0_VALID |
2338               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2339               (8 << VE0_OFFSET_SHIFT));
2340     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2341               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2342               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2343               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2344 }
2345
2346 static void
2347 gen6_emit_vertices(VADriverContextP ctx)
2348 {
2349     struct i965_driver_data *i965 = i965_driver_data(ctx);
2350     struct intel_batchbuffer *batch = i965->batch;
2351     struct i965_render_state *render_state = &i965->render_state;
2352
2353     BEGIN_BATCH(batch, 11);
2354     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2355     OUT_BATCH(batch, 
2356               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2357               GEN6_VB0_VERTEXDATA |
2358               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2359     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2360     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2361     OUT_BATCH(batch, 0);
2362
2363     OUT_BATCH(batch, 
2364               CMD_3DPRIMITIVE |
2365               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2366               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2367               (0 << 9) |
2368               4);
2369     OUT_BATCH(batch, 3); /* vertex count per instance */
2370     OUT_BATCH(batch, 0); /* start vertex offset */
2371     OUT_BATCH(batch, 1); /* single instance */
2372     OUT_BATCH(batch, 0); /* start instance location */
2373     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2374     ADVANCE_BATCH(batch);
2375 }
2376
2377 static void
2378 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct intel_batchbuffer *batch = i965->batch;
2382
2383     intel_batchbuffer_start_atomic(batch, 0x1000);
2384     intel_batchbuffer_emit_mi_flush(batch);
2385     gen6_emit_invarient_states(ctx);
2386     gen6_emit_state_base_address(ctx);
2387     gen6_emit_viewport_state_pointers(ctx);
2388     gen6_emit_urb(ctx);
2389     gen6_emit_cc_state_pointers(ctx);
2390     gen6_emit_sampler_state_pointers(ctx);
2391     gen6_emit_vs_state(ctx);
2392     gen6_emit_gs_state(ctx);
2393     gen6_emit_clip_state(ctx);
2394     gen6_emit_sf_state(ctx);
2395     gen6_emit_wm_state(ctx, kernel);
2396     gen6_emit_binding_table(ctx);
2397     gen6_emit_depth_buffer_state(ctx);
2398     gen6_emit_drawing_rectangle(ctx);
2399     gen6_emit_vertex_element_state(ctx);
2400     gen6_emit_vertices(ctx);
2401     intel_batchbuffer_end_atomic(batch);
2402 }
2403
2404 static void
2405 gen6_render_put_surface(
2406     VADriverContextP   ctx,
2407     struct object_surface *obj_surface,
2408     const VARectangle *src_rect,
2409     const VARectangle *dst_rect,
2410     unsigned int       flags
2411 )
2412 {
2413     struct i965_driver_data *i965 = i965_driver_data(ctx);
2414     struct intel_batchbuffer *batch = i965->batch;
2415
2416     gen6_render_initialize(ctx);
2417     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2418     i965_clear_dest_region(ctx);
2419     gen6_render_emit_states(ctx, PS_KERNEL);
2420     intel_batchbuffer_flush(batch);
2421 }
2422
2423 static void
2424 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2425 {
2426     struct i965_driver_data *i965 = i965_driver_data(ctx);
2427     struct i965_render_state *render_state = &i965->render_state;
2428     struct gen6_blend_state *blend_state;
2429
2430     dri_bo_unmap(render_state->cc.state);    
2431     dri_bo_map(render_state->cc.blend, 1);
2432     assert(render_state->cc.blend->virtual);
2433     blend_state = render_state->cc.blend->virtual;
2434     memset(blend_state, 0, sizeof(*blend_state));
2435     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2436     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2437     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2438     blend_state->blend0.blend_enable = 1;
2439     blend_state->blend1.post_blend_clamp_enable = 1;
2440     blend_state->blend1.pre_blend_clamp_enable = 1;
2441     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2442     dri_bo_unmap(render_state->cc.blend);
2443 }
2444
2445 static void
2446 gen6_subpicture_render_setup_states(
2447     VADriverContextP   ctx,
2448     struct object_surface *obj_surface,
2449     const VARectangle *src_rect,
2450     const VARectangle *dst_rect
2451 )
2452 {
2453     i965_render_dest_surface_state(ctx, 0);
2454     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2455     i965_render_sampler(ctx);
2456     i965_render_cc_viewport(ctx);
2457     gen6_render_color_calc_state(ctx);
2458     gen6_subpicture_render_blend_state(ctx);
2459     gen6_render_depth_stencil_state(ctx);
2460     i965_subpic_render_upload_constants(ctx, obj_surface);
2461     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2462 }
2463
2464 static void
2465 gen6_render_put_subpicture(
2466     VADriverContextP   ctx,
2467     struct object_surface *obj_surface,
2468     const VARectangle *src_rect,
2469     const VARectangle *dst_rect
2470 )
2471 {
2472     struct i965_driver_data *i965 = i965_driver_data(ctx);
2473     struct intel_batchbuffer *batch = i965->batch;
2474     unsigned int index = obj_surface->subpic_render_idx;
2475     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2476
2477     assert(obj_subpic);
2478     gen6_render_initialize(ctx);
2479     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2480     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2481     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2482     intel_batchbuffer_flush(batch);
2483 }
2484
2485 /*
2486  * for GEN7
2487  */
2488 static void 
2489 gen7_render_initialize(VADriverContextP ctx)
2490 {
2491     struct i965_driver_data *i965 = i965_driver_data(ctx);
2492     struct i965_render_state *render_state = &i965->render_state;
2493     dri_bo *bo;
2494     int size;
2495
2496     /* VERTEX BUFFER */
2497     dri_bo_unreference(render_state->vb.vertex_buffer);
2498     bo = dri_bo_alloc(i965->intel.bufmgr,
2499                       "vertex buffer",
2500                       4096,
2501                       4096);
2502     assert(bo);
2503     render_state->vb.vertex_buffer = bo;
2504
2505     /* WM */
2506     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2507     bo = dri_bo_alloc(i965->intel.bufmgr,
2508                       "surface state & binding table",
2509                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2510                       4096);
2511     assert(bo);
2512     render_state->wm.surface_state_binding_table_bo = bo;
2513
2514     dri_bo_unreference(render_state->wm.sampler);
2515     bo = dri_bo_alloc(i965->intel.bufmgr,
2516                       "sampler state",
2517                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2518                       4096);
2519     assert(bo);
2520     render_state->wm.sampler = bo;
2521     render_state->wm.sampler_count = 0;
2522
2523     /* COLOR CALCULATOR */
2524     dri_bo_unreference(render_state->cc.state);
2525     bo = dri_bo_alloc(i965->intel.bufmgr,
2526                       "color calc state",
2527                       sizeof(struct gen6_color_calc_state),
2528                       4096);
2529     assert(bo);
2530     render_state->cc.state = bo;
2531
2532     /* CC VIEWPORT */
2533     dri_bo_unreference(render_state->cc.viewport);
2534     bo = dri_bo_alloc(i965->intel.bufmgr,
2535                       "cc viewport",
2536                       sizeof(struct i965_cc_viewport),
2537                       4096);
2538     assert(bo);
2539     render_state->cc.viewport = bo;
2540
2541     /* BLEND STATE */
2542     dri_bo_unreference(render_state->cc.blend);
2543     size = sizeof(struct gen8_global_blend_state) + 2 * sizeof(struct gen8_blend_state_rt);
2544     bo = dri_bo_alloc(i965->intel.bufmgr,
2545                       "blend state",
2546                       size,
2547                       4096);
2548     assert(bo);
2549     render_state->cc.blend = bo;
2550
2551     /* DEPTH & STENCIL STATE */
2552     dri_bo_unreference(render_state->cc.depth_stencil);
2553     bo = dri_bo_alloc(i965->intel.bufmgr,
2554                       "depth & stencil state",
2555                       sizeof(struct gen6_depth_stencil_state),
2556                       4096);
2557     assert(bo);
2558     render_state->cc.depth_stencil = bo;
2559 }
2560
2561 /*
2562  * for GEN8
2563  */
2564 static void 
2565 gen8_render_initialize(VADriverContextP ctx)
2566 {
2567     struct i965_driver_data *i965 = i965_driver_data(ctx);
2568     struct i965_render_state *render_state = &i965->render_state;
2569     dri_bo *bo;
2570     int size;
2571     unsigned int end_offset;
2572
2573     /* VERTEX BUFFER */
2574     dri_bo_unreference(render_state->vb.vertex_buffer);
2575     bo = dri_bo_alloc(i965->intel.bufmgr,
2576                       "vertex buffer",
2577                       4096,
2578                       4096);
2579     assert(bo);
2580     render_state->vb.vertex_buffer = bo;
2581
2582     /* WM */
2583     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2584     bo = dri_bo_alloc(i965->intel.bufmgr,
2585                       "surface state & binding table",
2586                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2587                       4096);
2588     assert(bo);
2589     render_state->wm.surface_state_binding_table_bo = bo;
2590
2591     render_state->curbe_size = 256;
2592
2593     render_state->wm.sampler_count = 0;
2594
2595     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
2596
2597     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
2598
2599     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
2600
2601     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
2602                         16 * sizeof(struct gen8_blend_state_rt);
2603
2604     render_state->sf_clip_size = 1024;
2605
2606     render_state->scissor_size = 1024;
2607
2608     size = 4096 + render_state->curbe_size + render_state->sampler_size +
2609                 render_state->cc_state_size + render_state->cc_viewport_size +
2610                 render_state->blend_state_size + render_state->sf_clip_size +
2611                 render_state->scissor_size;
2612
2613     dri_bo_unreference(render_state->dynamic_state.bo);
2614     bo = dri_bo_alloc(i965->intel.bufmgr,
2615                       "dynamic_state",
2616                       size,
2617                       4096);
2618
2619     render_state->dynamic_state.bo = bo;
2620
2621     end_offset = 0;
2622     render_state->dynamic_state.end_offset = 0;
2623
2624     /* Constant buffer offset */
2625     render_state->curbe_offset = ALIGN(end_offset, 64);
2626     end_offset += render_state->curbe_size;
2627
2628     /* Sampler_state  */
2629     render_state->sampler_offset = ALIGN(end_offset, 64);
2630     end_offset += render_state->sampler_size;
2631
2632     /* CC_VIEWPORT_state  */
2633     render_state->cc_viewport_offset = ALIGN(end_offset, 64);
2634     end_offset += render_state->cc_viewport_size;
2635
2636     /* CC_STATE_state  */
2637     render_state->cc_state_offset = ALIGN(end_offset, 64);
2638     end_offset += render_state->cc_state_size;
2639
2640     /* Blend_state  */
2641     render_state->blend_state_offset = ALIGN(end_offset, 64);
2642     end_offset += render_state->blend_state_size;
2643
2644     /* SF_CLIP_state  */
2645     render_state->sf_clip_offset = ALIGN(end_offset, 64);
2646     end_offset += render_state->sf_clip_size;
2647
2648     /* SCISSOR_state  */
2649     render_state->scissor_offset = ALIGN(end_offset, 64);
2650     end_offset += render_state->scissor_size;
2651
2652     /* update the end offset of dynamic_state */
2653     render_state->dynamic_state.end_offset = ALIGN(end_offset, 64);
2654
2655 }
2656
2657 static void
2658 gen7_render_color_calc_state(VADriverContextP ctx)
2659 {
2660     struct i965_driver_data *i965 = i965_driver_data(ctx);
2661     struct i965_render_state *render_state = &i965->render_state;
2662     struct gen6_color_calc_state *color_calc_state;
2663     
2664     dri_bo_map(render_state->cc.state, 1);
2665     assert(render_state->cc.state->virtual);
2666     color_calc_state = render_state->cc.state->virtual;
2667     memset(color_calc_state, 0, sizeof(*color_calc_state));
2668     color_calc_state->constant_r = 1.0;
2669     color_calc_state->constant_g = 0.0;
2670     color_calc_state->constant_b = 1.0;
2671     color_calc_state->constant_a = 1.0;
2672     dri_bo_unmap(render_state->cc.state);
2673 }
2674
2675 static void
2676 gen7_render_blend_state(VADriverContextP ctx)
2677 {
2678     struct i965_driver_data *i965 = i965_driver_data(ctx);
2679     struct i965_render_state *render_state = &i965->render_state;
2680     struct gen6_blend_state *blend_state;
2681     
2682     dri_bo_map(render_state->cc.blend, 1);
2683     assert(render_state->cc.blend->virtual);
2684     blend_state = render_state->cc.blend->virtual;
2685     memset(blend_state, 0, sizeof(*blend_state));
2686     blend_state->blend1.logic_op_enable = 1;
2687     blend_state->blend1.logic_op_func = 0xc;
2688     blend_state->blend1.pre_blend_clamp_enable = 1;
2689     dri_bo_unmap(render_state->cc.blend);
2690 }
2691
2692 static void
2693 gen7_render_depth_stencil_state(VADriverContextP ctx)
2694 {
2695     struct i965_driver_data *i965 = i965_driver_data(ctx);
2696     struct i965_render_state *render_state = &i965->render_state;
2697     struct gen6_depth_stencil_state *depth_stencil_state;
2698     
2699     dri_bo_map(render_state->cc.depth_stencil, 1);
2700     assert(render_state->cc.depth_stencil->virtual);
2701     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2702     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2703     dri_bo_unmap(render_state->cc.depth_stencil);
2704 }
2705
2706 static void 
2707 gen7_render_sampler(VADriverContextP ctx)
2708 {
2709     struct i965_driver_data *i965 = i965_driver_data(ctx);
2710     struct i965_render_state *render_state = &i965->render_state;
2711     struct gen7_sampler_state *sampler_state;
2712     int i;
2713     
2714     assert(render_state->wm.sampler_count > 0);
2715     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2716
2717     dri_bo_map(render_state->wm.sampler, 1);
2718     assert(render_state->wm.sampler->virtual);
2719     sampler_state = render_state->wm.sampler->virtual;
2720     for (i = 0; i < render_state->wm.sampler_count; i++) {
2721         memset(sampler_state, 0, sizeof(*sampler_state));
2722         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2723         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2724         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2725         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2726         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2727         sampler_state++;
2728     }
2729
2730     dri_bo_unmap(render_state->wm.sampler);
2731 }
2732
2733 static void 
2734 gen8_render_sampler(VADriverContextP ctx)
2735 {
2736     struct i965_driver_data *i965 = i965_driver_data(ctx);
2737     struct i965_render_state *render_state = &i965->render_state;
2738     struct gen8_sampler_state *sampler_state;
2739     int i;
2740     unsigned char *cc_ptr;
2741     
2742     assert(render_state->wm.sampler_count > 0);
2743     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2744
2745     dri_bo_map(render_state->dynamic_state.bo, 1);
2746     assert(render_state->dynamic_state.bo->virtual);
2747
2748     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2749                         render_state->sampler_offset;
2750
2751     sampler_state = (struct gen8_sampler_state *) cc_ptr;
2752
2753     for (i = 0; i < render_state->wm.sampler_count; i++) {
2754         memset(sampler_state, 0, sizeof(*sampler_state));
2755         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2756         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2757         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2758         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2759         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2760         sampler_state++;
2761     }
2762
2763     dri_bo_unmap(render_state->dynamic_state.bo);
2764 }
2765
2766
2767 static void
2768 gen7_render_setup_states(
2769     VADriverContextP   ctx,
2770     struct object_surface *obj_surface,
2771     const VARectangle *src_rect,
2772     const VARectangle *dst_rect,
2773     unsigned int       flags
2774 )
2775 {
2776     i965_render_dest_surface_state(ctx, 0);
2777     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2778     gen7_render_sampler(ctx);
2779     i965_render_cc_viewport(ctx);
2780     gen7_render_color_calc_state(ctx);
2781     gen7_render_blend_state(ctx);
2782     gen7_render_depth_stencil_state(ctx);
2783     i965_render_upload_constants(ctx, obj_surface, flags);
2784     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2785 }
2786
2787 static void
2788 gen8_render_blend_state(VADriverContextP ctx)
2789 {
2790     struct i965_driver_data *i965 = i965_driver_data(ctx);
2791     struct i965_render_state *render_state = &i965->render_state;
2792     struct gen8_global_blend_state *global_blend_state;
2793     struct gen8_blend_state_rt *blend_state;
2794     unsigned char *cc_ptr;
2795     
2796     dri_bo_map(render_state->dynamic_state.bo, 1);
2797     assert(render_state->dynamic_state.bo->virtual);
2798
2799     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2800                         render_state->blend_state_offset;
2801
2802     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
2803
2804     memset(global_blend_state, 0, sizeof(*global_blend_state));
2805     /* Global blend state + blend_state for Render Target */
2806     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
2807     blend_state->blend1.logic_op_enable = 1;
2808     blend_state->blend1.logic_op_func = 0xc;
2809     blend_state->blend1.pre_blend_clamp_enable = 1;
2810
2811     dri_bo_unmap(render_state->dynamic_state.bo);
2812 }
2813
2814
2815 static void 
2816 gen8_render_cc_viewport(VADriverContextP ctx)
2817 {
2818     struct i965_driver_data *i965 = i965_driver_data(ctx);
2819     struct i965_render_state *render_state = &i965->render_state;
2820     struct i965_cc_viewport *cc_viewport;
2821     unsigned char *cc_ptr;
2822
2823     dri_bo_map(render_state->dynamic_state.bo, 1);
2824     assert(render_state->dynamic_state.bo->virtual);
2825
2826     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2827                         render_state->cc_viewport_offset;
2828
2829     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
2830
2831     memset(cc_viewport, 0, sizeof(*cc_viewport));
2832     
2833     cc_viewport->min_depth = -1.e35;
2834     cc_viewport->max_depth = 1.e35;
2835
2836     dri_bo_unmap(render_state->dynamic_state.bo);
2837 }
2838
2839 static void
2840 gen8_render_color_calc_state(VADriverContextP ctx)
2841 {
2842     struct i965_driver_data *i965 = i965_driver_data(ctx);
2843     struct i965_render_state *render_state = &i965->render_state;
2844     struct gen6_color_calc_state *color_calc_state;
2845     unsigned char *cc_ptr;
2846
2847     dri_bo_map(render_state->dynamic_state.bo, 1);
2848     assert(render_state->dynamic_state.bo->virtual);
2849
2850     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2851                         render_state->cc_state_offset;
2852
2853     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
2854
2855     memset(color_calc_state, 0, sizeof(*color_calc_state));
2856     color_calc_state->constant_r = 1.0;
2857     color_calc_state->constant_g = 0.0;
2858     color_calc_state->constant_b = 1.0;
2859     color_calc_state->constant_a = 1.0;
2860     dri_bo_unmap(render_state->dynamic_state.bo);
2861 }
2862
2863 static void
2864 gen8_render_upload_constants(VADriverContextP ctx,
2865                              struct object_surface *obj_surface,
2866                              unsigned int flags)
2867 {
2868     struct i965_driver_data *i965 = i965_driver_data(ctx);
2869     struct i965_render_state *render_state = &i965->render_state;
2870     unsigned short *constant_buffer;
2871     unsigned char *cc_ptr;
2872     float *color_balance_base;
2873     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
2874     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
2875     float hue = (float)i965->hue_attrib->value / 180 * PI;
2876     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
2877     float *yuv_to_rgb;
2878     unsigned int color_flag;
2879
2880     dri_bo_map(render_state->dynamic_state.bo, 1);
2881     assert(render_state->dynamic_state.bo->virtual);
2882
2883     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2884                         render_state->curbe_offset;
2885
2886     constant_buffer = (unsigned short *) cc_ptr;
2887
2888     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
2889         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
2890
2891         *constant_buffer = 2;
2892     } else {
2893         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
2894             *constant_buffer = 1;
2895         else
2896             *constant_buffer = 0;
2897     }
2898
2899     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
2900         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
2901         i965->hue_attrib->value == DEFAULT_HUE &&
2902         i965->saturation_attrib->value == DEFAULT_SATURATION)
2903         constant_buffer[1] = 1; /* skip color balance transformation */
2904     else
2905         constant_buffer[1] = 0;
2906
2907     color_balance_base = (float *)constant_buffer + 4;
2908     *color_balance_base++ = contrast;
2909     *color_balance_base++ = brightness;
2910     *color_balance_base++ = cos(hue) * contrast * saturation;
2911     *color_balance_base++ = sin(hue) * contrast * saturation;
2912
2913     color_flag = flags & VA_SRC_COLOR_MASK;
2914     yuv_to_rgb = (float *)constant_buffer + 8;
2915     if (color_flag == VA_SRC_BT709)
2916         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
2917     else if (color_flag == VA_SRC_SMPTE_240)
2918         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
2919     else
2920         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
2921
2922     dri_bo_unmap(render_state->dynamic_state.bo);
2923 }
2924
2925 static void
2926 gen8_render_setup_states(
2927     VADriverContextP   ctx,
2928     struct object_surface *obj_surface,
2929     const VARectangle *src_rect,
2930     const VARectangle *dst_rect,
2931     unsigned int       flags
2932 )
2933 {
2934     i965_render_dest_surface_state(ctx, 0);
2935     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2936     gen8_render_sampler(ctx);
2937     gen8_render_cc_viewport(ctx);
2938     gen8_render_color_calc_state(ctx);
2939     gen8_render_blend_state(ctx);
2940     gen8_render_upload_constants(ctx, obj_surface, flags);
2941     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2942 }
2943
2944 static void
2945 gen7_emit_invarient_states(VADriverContextP ctx)
2946 {
2947     struct i965_driver_data *i965 = i965_driver_data(ctx);
2948     struct intel_batchbuffer *batch = i965->batch;
2949
2950     BEGIN_BATCH(batch, 1);
2951     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2952     ADVANCE_BATCH(batch);
2953
2954     BEGIN_BATCH(batch, 4);
2955     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2956     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2957               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2958     OUT_BATCH(batch, 0);
2959     OUT_BATCH(batch, 0);
2960     ADVANCE_BATCH(batch);
2961
2962     BEGIN_BATCH(batch, 2);
2963     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2964     OUT_BATCH(batch, 1);
2965     ADVANCE_BATCH(batch);
2966
2967     /* Set system instruction pointer */
2968     BEGIN_BATCH(batch, 2);
2969     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2970     OUT_BATCH(batch, 0);
2971     ADVANCE_BATCH(batch);
2972 }
2973
2974 static void
2975 gen7_emit_state_base_address(VADriverContextP ctx)
2976 {
2977     struct i965_driver_data *i965 = i965_driver_data(ctx);
2978     struct intel_batchbuffer *batch = i965->batch;
2979     struct i965_render_state *render_state = &i965->render_state;
2980
2981     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2982     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2983     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2984     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2985     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2986     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2987     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2988     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2989     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2990     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2991 }
2992
2993 static void
2994 gen8_emit_state_base_address(VADriverContextP ctx)
2995 {
2996     struct i965_driver_data *i965 = i965_driver_data(ctx);
2997     struct intel_batchbuffer *batch = i965->batch;
2998     struct i965_render_state *render_state = &i965->render_state;
2999
3000     BEGIN_BATCH(batch, 16);
3001     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
3002     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
3003         OUT_BATCH(batch, 0);
3004         OUT_BATCH(batch, 0);
3005         /*DW4 */
3006     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3007         OUT_BATCH(batch, 0);
3008
3009         /*DW6*/
3010     /* Dynamic state base address */
3011     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
3012                 0, BASE_ADDRESS_MODIFY);
3013     OUT_BATCH(batch, 0);
3014
3015         /*DW8*/
3016     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
3017     OUT_BATCH(batch, 0);
3018
3019         /*DW10 */
3020     /* Instruction base address */
3021     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
3022     OUT_BATCH(batch, 0);
3023
3024         /*DW12 */       
3025     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
3026     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
3027     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
3028     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
3029     ADVANCE_BATCH(batch);
3030 }
3031
3032 static void
3033 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
3034 {
3035     struct i965_driver_data *i965 = i965_driver_data(ctx);
3036     struct intel_batchbuffer *batch = i965->batch;
3037     struct i965_render_state *render_state = &i965->render_state;
3038
3039     BEGIN_BATCH(batch, 2);
3040     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
3041     OUT_RELOC(batch,
3042               render_state->cc.viewport,
3043               I915_GEM_DOMAIN_INSTRUCTION, 0,
3044               0);
3045     ADVANCE_BATCH(batch);
3046
3047     BEGIN_BATCH(batch, 2);
3048     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
3049     OUT_BATCH(batch, 0);
3050     ADVANCE_BATCH(batch);
3051 }
3052
3053 /*
3054  * URB layout on GEN7 
3055  * ----------------------------------------
3056  * | PS Push Constants (8KB) | VS entries |
3057  * ----------------------------------------
3058  */
3059 static void
3060 gen7_emit_urb(VADriverContextP ctx)
3061 {
3062     struct i965_driver_data *i965 = i965_driver_data(ctx);
3063     struct intel_batchbuffer *batch = i965->batch;
3064     unsigned int num_urb_entries = 32;
3065
3066     if (IS_HASWELL(i965->intel.device_id))
3067         num_urb_entries = 64;
3068
3069     BEGIN_BATCH(batch, 2);
3070     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3071     OUT_BATCH(batch, 8); /* in 1KBs */
3072     ADVANCE_BATCH(batch);
3073
3074     BEGIN_BATCH(batch, 2);
3075     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3076     OUT_BATCH(batch, 
3077               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3078               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3079               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3080    ADVANCE_BATCH(batch);
3081
3082    BEGIN_BATCH(batch, 2);
3083    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3084    OUT_BATCH(batch,
3085              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3086              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3087    ADVANCE_BATCH(batch);
3088
3089    BEGIN_BATCH(batch, 2);
3090    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3091    OUT_BATCH(batch,
3092              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3093              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3094    ADVANCE_BATCH(batch);
3095
3096    BEGIN_BATCH(batch, 2);
3097    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3098    OUT_BATCH(batch,
3099              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3100              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3101    ADVANCE_BATCH(batch);
3102 }
3103
3104 static void
3105 gen7_emit_cc_state_pointers(VADriverContextP ctx)
3106 {
3107     struct i965_driver_data *i965 = i965_driver_data(ctx);
3108     struct intel_batchbuffer *batch = i965->batch;
3109     struct i965_render_state *render_state = &i965->render_state;
3110
3111     BEGIN_BATCH(batch, 2);
3112     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3113     OUT_RELOC(batch,
3114               render_state->cc.state,
3115               I915_GEM_DOMAIN_INSTRUCTION, 0,
3116               1);
3117     ADVANCE_BATCH(batch);
3118
3119     BEGIN_BATCH(batch, 2);
3120     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3121     OUT_RELOC(batch,
3122               render_state->cc.blend,
3123               I915_GEM_DOMAIN_INSTRUCTION, 0,
3124               1);
3125     ADVANCE_BATCH(batch);
3126
3127     BEGIN_BATCH(batch, 2);
3128     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
3129     OUT_RELOC(batch,
3130               render_state->cc.depth_stencil,
3131               I915_GEM_DOMAIN_INSTRUCTION, 0, 
3132               1);
3133     ADVANCE_BATCH(batch);
3134 }
3135
3136 static void
3137 gen8_emit_cc_state_pointers(VADriverContextP ctx)
3138 {
3139     struct i965_driver_data *i965 = i965_driver_data(ctx);
3140     struct intel_batchbuffer *batch = i965->batch;
3141     struct i965_render_state *render_state = &i965->render_state;
3142
3143     BEGIN_BATCH(batch, 2);
3144     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3145     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
3146     ADVANCE_BATCH(batch);
3147
3148     BEGIN_BATCH(batch, 2);
3149     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3150     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
3151     ADVANCE_BATCH(batch);
3152
3153 }
3154
3155 static void
3156 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
3157 {
3158     struct i965_driver_data *i965 = i965_driver_data(ctx);
3159     struct intel_batchbuffer *batch = i965->batch;
3160     struct i965_render_state *render_state = &i965->render_state;
3161
3162     BEGIN_BATCH(batch, 2);
3163     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
3164     OUT_RELOC(batch,
3165               render_state->wm.sampler,
3166               I915_GEM_DOMAIN_INSTRUCTION, 0,
3167               0);
3168     ADVANCE_BATCH(batch);
3169 }
3170
3171 static void
3172 gen7_emit_binding_table(VADriverContextP ctx)
3173 {
3174     struct i965_driver_data *i965 = i965_driver_data(ctx);
3175     struct intel_batchbuffer *batch = i965->batch;
3176
3177     BEGIN_BATCH(batch, 2);
3178     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
3179     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
3180     ADVANCE_BATCH(batch);
3181 }
3182
3183 static void
3184 gen7_emit_depth_buffer_state(VADriverContextP ctx)
3185 {
3186     struct i965_driver_data *i965 = i965_driver_data(ctx);
3187     struct intel_batchbuffer *batch = i965->batch;
3188
3189     BEGIN_BATCH(batch, 7);
3190     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
3191     OUT_BATCH(batch,
3192               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
3193               (I965_SURFACE_NULL << 29));
3194     OUT_BATCH(batch, 0);
3195     OUT_BATCH(batch, 0);
3196     OUT_BATCH(batch, 0);
3197     OUT_BATCH(batch, 0);
3198     OUT_BATCH(batch, 0);
3199     ADVANCE_BATCH(batch);
3200
3201     BEGIN_BATCH(batch, 3);
3202     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
3203     OUT_BATCH(batch, 0);
3204     OUT_BATCH(batch, 0);
3205     ADVANCE_BATCH(batch);
3206 }
3207
3208 static void
3209 gen7_emit_drawing_rectangle(VADriverContextP ctx)
3210 {
3211     i965_render_drawing_rectangle(ctx);
3212 }
3213
3214 static void 
3215 gen7_emit_vs_state(VADriverContextP ctx)
3216 {
3217     struct i965_driver_data *i965 = i965_driver_data(ctx);
3218     struct intel_batchbuffer *batch = i965->batch;
3219
3220     /* disable VS constant buffer */
3221     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
3222     OUT_BATCH(batch, 0);
3223     OUT_BATCH(batch, 0);
3224     OUT_BATCH(batch, 0);
3225     OUT_BATCH(batch, 0);
3226     OUT_BATCH(batch, 0);
3227     OUT_BATCH(batch, 0);
3228         
3229     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
3230     OUT_BATCH(batch, 0); /* without VS kernel */
3231     OUT_BATCH(batch, 0);
3232     OUT_BATCH(batch, 0);
3233     OUT_BATCH(batch, 0);
3234     OUT_BATCH(batch, 0); /* pass-through */
3235 }
3236
3237 static void 
3238 gen7_emit_bypass_state(VADriverContextP ctx)
3239 {
3240     struct i965_driver_data *i965 = i965_driver_data(ctx);
3241     struct intel_batchbuffer *batch = i965->batch;
3242
3243     /* bypass GS */
3244     BEGIN_BATCH(batch, 7);
3245     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
3246     OUT_BATCH(batch, 0);
3247     OUT_BATCH(batch, 0);
3248     OUT_BATCH(batch, 0);
3249     OUT_BATCH(batch, 0);
3250     OUT_BATCH(batch, 0);
3251     OUT_BATCH(batch, 0);
3252     ADVANCE_BATCH(batch);
3253
3254     BEGIN_BATCH(batch, 7);      
3255     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
3256     OUT_BATCH(batch, 0); /* without GS kernel */
3257     OUT_BATCH(batch, 0);
3258     OUT_BATCH(batch, 0);
3259     OUT_BATCH(batch, 0);
3260     OUT_BATCH(batch, 0);
3261     OUT_BATCH(batch, 0); /* pass-through */
3262     ADVANCE_BATCH(batch);
3263
3264     BEGIN_BATCH(batch, 2);
3265     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3266     OUT_BATCH(batch, 0);
3267     ADVANCE_BATCH(batch);
3268
3269     /* disable HS */
3270     BEGIN_BATCH(batch, 7);
3271     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
3272     OUT_BATCH(batch, 0);
3273     OUT_BATCH(batch, 0);
3274     OUT_BATCH(batch, 0);
3275     OUT_BATCH(batch, 0);
3276     OUT_BATCH(batch, 0);
3277     OUT_BATCH(batch, 0);
3278     ADVANCE_BATCH(batch);
3279
3280     BEGIN_BATCH(batch, 7);
3281     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
3282     OUT_BATCH(batch, 0);
3283     OUT_BATCH(batch, 0);
3284     OUT_BATCH(batch, 0);
3285     OUT_BATCH(batch, 0);
3286     OUT_BATCH(batch, 0);
3287     OUT_BATCH(batch, 0);
3288     ADVANCE_BATCH(batch);
3289
3290     BEGIN_BATCH(batch, 2);
3291     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3292     OUT_BATCH(batch, 0);
3293     ADVANCE_BATCH(batch);
3294
3295     /* Disable TE */
3296     BEGIN_BATCH(batch, 4);
3297     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3298     OUT_BATCH(batch, 0);
3299     OUT_BATCH(batch, 0);
3300     OUT_BATCH(batch, 0);
3301     ADVANCE_BATCH(batch);
3302
3303     /* Disable DS */
3304     BEGIN_BATCH(batch, 7);
3305     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
3306     OUT_BATCH(batch, 0);
3307     OUT_BATCH(batch, 0);
3308     OUT_BATCH(batch, 0);
3309     OUT_BATCH(batch, 0);
3310     OUT_BATCH(batch, 0);
3311     OUT_BATCH(batch, 0);
3312     ADVANCE_BATCH(batch);
3313
3314     BEGIN_BATCH(batch, 6);
3315     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
3316     OUT_BATCH(batch, 0);
3317     OUT_BATCH(batch, 0);
3318     OUT_BATCH(batch, 0);
3319     OUT_BATCH(batch, 0);
3320     OUT_BATCH(batch, 0);
3321     ADVANCE_BATCH(batch);
3322
3323     BEGIN_BATCH(batch, 2);
3324     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3325     OUT_BATCH(batch, 0);
3326     ADVANCE_BATCH(batch);
3327
3328     /* Disable STREAMOUT */
3329     BEGIN_BATCH(batch, 3);
3330     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
3331     OUT_BATCH(batch, 0);
3332     OUT_BATCH(batch, 0);
3333     ADVANCE_BATCH(batch);
3334 }
3335
3336 static void 
3337 gen7_emit_clip_state(VADriverContextP ctx)
3338 {
3339     struct i965_driver_data *i965 = i965_driver_data(ctx);
3340     struct intel_batchbuffer *batch = i965->batch;
3341
3342     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3343     OUT_BATCH(batch, 0);
3344     OUT_BATCH(batch, 0); /* pass-through */
3345     OUT_BATCH(batch, 0);
3346 }
3347
3348 static void 
3349 gen7_emit_sf_state(VADriverContextP ctx)
3350 {
3351     struct i965_driver_data *i965 = i965_driver_data(ctx);
3352     struct intel_batchbuffer *batch = i965->batch;
3353
3354     BEGIN_BATCH(batch, 14);
3355     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
3356     OUT_BATCH(batch,
3357               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3358               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3359               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3360     OUT_BATCH(batch, 0);
3361     OUT_BATCH(batch, 0);
3362     OUT_BATCH(batch, 0); /* DW4 */
3363     OUT_BATCH(batch, 0);
3364     OUT_BATCH(batch, 0);
3365     OUT_BATCH(batch, 0);
3366     OUT_BATCH(batch, 0);
3367     OUT_BATCH(batch, 0); /* DW9 */
3368     OUT_BATCH(batch, 0);
3369     OUT_BATCH(batch, 0);
3370     OUT_BATCH(batch, 0);
3371     OUT_BATCH(batch, 0);
3372     ADVANCE_BATCH(batch);
3373
3374     BEGIN_BATCH(batch, 7);
3375     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
3376     OUT_BATCH(batch, 0);
3377     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
3378     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3379     OUT_BATCH(batch, 0);
3380     OUT_BATCH(batch, 0);
3381     OUT_BATCH(batch, 0);
3382     ADVANCE_BATCH(batch);
3383 }
3384
3385 static void 
3386 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
3387 {
3388     struct i965_driver_data *i965 = i965_driver_data(ctx);
3389     struct intel_batchbuffer *batch = i965->batch;
3390     struct i965_render_state *render_state = &i965->render_state;
3391     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
3392     unsigned int num_samples = 0;
3393
3394     if (IS_HASWELL(i965->intel.device_id)) {
3395         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
3396         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
3397     }
3398
3399     BEGIN_BATCH(batch, 3);
3400     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
3401     OUT_BATCH(batch,
3402               GEN7_WM_DISPATCH_ENABLE |
3403               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3404     OUT_BATCH(batch, 0);
3405     ADVANCE_BATCH(batch);
3406
3407     BEGIN_BATCH(batch, 7);
3408     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
3409     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
3410     OUT_BATCH(batch, 0);
3411     OUT_RELOC(batch, 
3412               render_state->curbe.bo,
3413               I915_GEM_DOMAIN_INSTRUCTION, 0,
3414               0);
3415     OUT_BATCH(batch, 0);
3416     OUT_BATCH(batch, 0);
3417     OUT_BATCH(batch, 0);
3418     ADVANCE_BATCH(batch);
3419
3420     BEGIN_BATCH(batch, 8);
3421     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
3422     OUT_RELOC(batch, 
3423               render_state->render_kernels[kernel].bo,
3424               I915_GEM_DOMAIN_INSTRUCTION, 0,
3425               0);
3426     OUT_BATCH(batch, 
3427               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3428               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3429     OUT_BATCH(batch, 0); /* scratch space base offset */
3430     OUT_BATCH(batch, 
3431               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
3432               GEN7_PS_PUSH_CONSTANT_ENABLE |
3433               GEN7_PS_ATTRIBUTE_ENABLE |
3434               GEN7_PS_16_DISPATCH_ENABLE);
3435     OUT_BATCH(batch, 
3436               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3437     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3438     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3439     ADVANCE_BATCH(batch);
3440 }
3441
3442 static void
3443 gen7_emit_vertex_element_state(VADriverContextP ctx)
3444 {
3445     struct i965_driver_data *i965 = i965_driver_data(ctx);
3446     struct intel_batchbuffer *batch = i965->batch;
3447
3448     /* Set up our vertex elements, sourced from the single vertex buffer. */
3449     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3450     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3451     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3452               GEN6_VE0_VALID |
3453               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3454               (0 << VE0_OFFSET_SHIFT));
3455     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3456               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3457               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3458               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3459     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3460     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3461               GEN6_VE0_VALID |
3462               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3463               (8 << VE0_OFFSET_SHIFT));
3464     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3465               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3466               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3467               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3468 }
3469
3470 static void
3471 gen7_emit_vertices(VADriverContextP ctx)
3472 {
3473     struct i965_driver_data *i965 = i965_driver_data(ctx);
3474     struct intel_batchbuffer *batch = i965->batch;
3475     struct i965_render_state *render_state = &i965->render_state;
3476
3477     BEGIN_BATCH(batch, 5);
3478     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3479     OUT_BATCH(batch, 
3480               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
3481               GEN6_VB0_VERTEXDATA |
3482               GEN7_VB0_ADDRESS_MODIFYENABLE |
3483               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3484     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3485     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
3486     OUT_BATCH(batch, 0);
3487     ADVANCE_BATCH(batch);
3488
3489     BEGIN_BATCH(batch, 7);
3490     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3491     OUT_BATCH(batch,
3492               _3DPRIM_RECTLIST |
3493               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3494     OUT_BATCH(batch, 3); /* vertex count per instance */
3495     OUT_BATCH(batch, 0); /* start vertex offset */
3496     OUT_BATCH(batch, 1); /* single instance */
3497     OUT_BATCH(batch, 0); /* start instance location */
3498     OUT_BATCH(batch, 0);
3499     ADVANCE_BATCH(batch);
3500 }
3501
3502 static void
3503 gen7_render_emit_states(VADriverContextP ctx, int kernel)
3504 {
3505     struct i965_driver_data *i965 = i965_driver_data(ctx);
3506     struct intel_batchbuffer *batch = i965->batch;
3507
3508     intel_batchbuffer_start_atomic(batch, 0x1000);
3509     intel_batchbuffer_emit_mi_flush(batch);
3510     gen7_emit_invarient_states(ctx);
3511     gen7_emit_state_base_address(ctx);
3512     gen7_emit_viewport_state_pointers(ctx);
3513     gen7_emit_urb(ctx);
3514     gen7_emit_cc_state_pointers(ctx);
3515     gen7_emit_sampler_state_pointers(ctx);
3516     gen7_emit_bypass_state(ctx);
3517     gen7_emit_vs_state(ctx);
3518     gen7_emit_clip_state(ctx);
3519     gen7_emit_sf_state(ctx);
3520     gen7_emit_wm_state(ctx, kernel);
3521     gen7_emit_binding_table(ctx);
3522     gen7_emit_depth_buffer_state(ctx);
3523     gen7_emit_drawing_rectangle(ctx);
3524     gen7_emit_vertex_element_state(ctx);
3525     gen7_emit_vertices(ctx);
3526     intel_batchbuffer_end_atomic(batch);
3527 }
3528
3529 static void
3530 gen8_emit_vertices(VADriverContextP ctx)
3531 {
3532     struct i965_driver_data *i965 = i965_driver_data(ctx);
3533     struct intel_batchbuffer *batch = i965->batch;
3534     struct i965_render_state *render_state = &i965->render_state;
3535
3536     BEGIN_BATCH(batch, 5);
3537     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3538     OUT_BATCH(batch, 
3539               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
3540               (0 << GEN8_VB0_MOCS_SHIFT) |
3541               GEN7_VB0_ADDRESS_MODIFYENABLE |
3542               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3543     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3544     OUT_BATCH(batch, 0);
3545     OUT_BATCH(batch, 12 * 4);
3546     ADVANCE_BATCH(batch);
3547
3548     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
3549     BEGIN_BATCH(batch, 2);
3550     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
3551     OUT_BATCH(batch,
3552               _3DPRIM_RECTLIST);
3553     ADVANCE_BATCH(batch);
3554
3555     
3556     BEGIN_BATCH(batch, 7);
3557     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3558     OUT_BATCH(batch,
3559               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3560     OUT_BATCH(batch, 3); /* vertex count per instance */
3561     OUT_BATCH(batch, 0); /* start vertex offset */
3562     OUT_BATCH(batch, 1); /* single instance */
3563     OUT_BATCH(batch, 0); /* start instance location */
3564     OUT_BATCH(batch, 0);
3565     ADVANCE_BATCH(batch);
3566 }
3567
3568 static void
3569 gen8_emit_vertex_element_state(VADriverContextP ctx)
3570 {
3571     struct i965_driver_data *i965 = i965_driver_data(ctx);
3572     struct intel_batchbuffer *batch = i965->batch;
3573
3574     /*
3575      * The VUE layout
3576      * dword 0-3: pad (0, 0, 0. 0)
3577      * dword 4-7: position (x, y, 1.0, 1.0),
3578      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
3579      */
3580
3581     /* Set up our vertex elements, sourced from the single vertex buffer. */
3582     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
3583
3584     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
3585      * We don't really know or care what they do.
3586      */
3587
3588     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3589               GEN8_VE0_VALID |
3590               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3591               (0 << VE0_OFFSET_SHIFT));
3592     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
3593               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
3594               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
3595               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
3596
3597     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
3598     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3599               GEN8_VE0_VALID |
3600               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3601               (8 << VE0_OFFSET_SHIFT));
3602     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3603               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3604               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3605               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3606
3607     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
3608     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3609               GEN8_VE0_VALID |
3610               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3611               (0 << VE0_OFFSET_SHIFT));
3612     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3613               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3614               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3615               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3616 }
3617
3618 static void 
3619 gen8_emit_vs_state(VADriverContextP ctx)
3620 {
3621     struct i965_driver_data *i965 = i965_driver_data(ctx);
3622     struct intel_batchbuffer *batch = i965->batch;
3623
3624     /* disable VS constant buffer */
3625     BEGIN_BATCH(batch, 11);
3626     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
3627     OUT_BATCH(batch, 0);
3628     OUT_BATCH(batch, 0);
3629     /* CS Buffer 0 */
3630     OUT_BATCH(batch, 0);
3631     OUT_BATCH(batch, 0);
3632     /* CS Buffer 1 */
3633     OUT_BATCH(batch, 0);
3634     OUT_BATCH(batch, 0);
3635     /* CS Buffer 2 */
3636     OUT_BATCH(batch, 0);
3637     OUT_BATCH(batch, 0);
3638     /* CS Buffer 3 */
3639     OUT_BATCH(batch, 0);
3640     OUT_BATCH(batch, 0);
3641     ADVANCE_BATCH(batch);
3642         
3643     BEGIN_BATCH(batch, 9);
3644     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
3645     OUT_BATCH(batch, 0); /* without VS kernel */
3646     OUT_BATCH(batch, 0);
3647     /* VS shader dispatch flag */
3648     OUT_BATCH(batch, 0);
3649     OUT_BATCH(batch, 0);
3650     OUT_BATCH(batch, 0);
3651     /* DW6. VS shader GRF and URB buffer definition */
3652     OUT_BATCH(batch, 0);
3653     OUT_BATCH(batch, 0); /* pass-through */
3654     OUT_BATCH(batch, 0);
3655     ADVANCE_BATCH(batch);
3656
3657     BEGIN_BATCH(batch, 2);
3658     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
3659     OUT_BATCH(batch, 0);
3660     ADVANCE_BATCH(batch);
3661
3662     BEGIN_BATCH(batch, 2);
3663     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
3664     OUT_BATCH(batch, 0);
3665     ADVANCE_BATCH(batch);
3666
3667 }
3668
3669 /*
3670  * URB layout on GEN8 
3671  * ----------------------------------------
3672  * | PS Push Constants (8KB) | VS entries |
3673  * ----------------------------------------
3674  */
3675 static void
3676 gen8_emit_urb(VADriverContextP ctx)
3677 {
3678     struct i965_driver_data *i965 = i965_driver_data(ctx);
3679     struct intel_batchbuffer *batch = i965->batch;
3680     unsigned int num_urb_entries = 64;
3681
3682     /* The minimum urb entries is 64 */
3683
3684     BEGIN_BATCH(batch, 2);
3685     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
3686     OUT_BATCH(batch, 0);
3687     ADVANCE_BATCH(batch);
3688
3689     BEGIN_BATCH(batch, 2);
3690     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
3691     OUT_BATCH(batch, 0);
3692     ADVANCE_BATCH(batch);
3693
3694     BEGIN_BATCH(batch, 2);
3695     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
3696     OUT_BATCH(batch, 0);
3697     ADVANCE_BATCH(batch);
3698
3699     BEGIN_BATCH(batch, 2);
3700     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
3701     OUT_BATCH(batch, 0);
3702     ADVANCE_BATCH(batch);
3703
3704     /* Size is 8Kbs and base address is 0Kb */
3705     BEGIN_BATCH(batch, 2);
3706     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3707     /* Size is 8Kbs and base address is 0Kb */
3708     OUT_BATCH(batch,
3709                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
3710                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
3711     ADVANCE_BATCH(batch);
3712
3713     BEGIN_BATCH(batch, 2);
3714     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3715     OUT_BATCH(batch, 
3716               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3717               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3718               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3719    ADVANCE_BATCH(batch);
3720
3721    BEGIN_BATCH(batch, 2);
3722    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3723    OUT_BATCH(batch,
3724              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3725              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3726    ADVANCE_BATCH(batch);
3727
3728    BEGIN_BATCH(batch, 2);
3729    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3730    OUT_BATCH(batch,
3731              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3732              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3733    ADVANCE_BATCH(batch);
3734
3735    BEGIN_BATCH(batch, 2);
3736    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3737    OUT_BATCH(batch,
3738              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3739              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3740    ADVANCE_BATCH(batch);
3741 }
3742
3743 static void 
3744 gen8_emit_bypass_state(VADriverContextP ctx)
3745 {
3746     struct i965_driver_data *i965 = i965_driver_data(ctx);
3747     struct intel_batchbuffer *batch = i965->batch;
3748
3749     /* bypass GS */
3750     BEGIN_BATCH(batch, 11);
3751     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
3752     OUT_BATCH(batch, 0);
3753     OUT_BATCH(batch, 0);
3754     OUT_BATCH(batch, 0);
3755     OUT_BATCH(batch, 0);
3756     OUT_BATCH(batch, 0);
3757     OUT_BATCH(batch, 0);
3758     OUT_BATCH(batch, 0);
3759     OUT_BATCH(batch, 0);
3760     OUT_BATCH(batch, 0);
3761     OUT_BATCH(batch, 0);
3762     ADVANCE_BATCH(batch);
3763
3764     BEGIN_BATCH(batch, 10);     
3765     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
3766     /* GS shader address */
3767     OUT_BATCH(batch, 0); /* without GS kernel */
3768     OUT_BATCH(batch, 0);
3769     /* DW3. GS shader dispatch flag */
3770     OUT_BATCH(batch, 0);
3771     OUT_BATCH(batch, 0);
3772     OUT_BATCH(batch, 0);
3773     /* DW6. GS shader GRF and URB offset/length */
3774     OUT_BATCH(batch, 0);
3775     OUT_BATCH(batch, 0); /* pass-through */
3776     OUT_BATCH(batch, 0);
3777     OUT_BATCH(batch, 0);
3778     ADVANCE_BATCH(batch);
3779
3780     BEGIN_BATCH(batch, 2);
3781     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3782     OUT_BATCH(batch, 0);
3783     ADVANCE_BATCH(batch);
3784
3785     BEGIN_BATCH(batch, 2);
3786     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
3787     OUT_BATCH(batch, 0);
3788     ADVANCE_BATCH(batch);
3789
3790     /* disable HS */
3791     BEGIN_BATCH(batch, 11);
3792     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
3793     OUT_BATCH(batch, 0);
3794     OUT_BATCH(batch, 0);
3795     OUT_BATCH(batch, 0);
3796     OUT_BATCH(batch, 0);
3797     OUT_BATCH(batch, 0);
3798     OUT_BATCH(batch, 0);
3799     OUT_BATCH(batch, 0);
3800     OUT_BATCH(batch, 0);
3801     OUT_BATCH(batch, 0);
3802     OUT_BATCH(batch, 0);
3803     ADVANCE_BATCH(batch);
3804
3805     BEGIN_BATCH(batch, 9);
3806     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
3807     OUT_BATCH(batch, 0);
3808     /*DW2. HS pass-through */
3809     OUT_BATCH(batch, 0);
3810     /*DW3. HS shader address */
3811     OUT_BATCH(batch, 0);
3812     OUT_BATCH(batch, 0);
3813     /*DW5. HS shader flag. URB offset/length and so on */
3814     OUT_BATCH(batch, 0);
3815     OUT_BATCH(batch, 0);
3816     OUT_BATCH(batch, 0);
3817     OUT_BATCH(batch, 0);
3818     ADVANCE_BATCH(batch);
3819
3820     BEGIN_BATCH(batch, 2);
3821     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3822     OUT_BATCH(batch, 0);
3823     ADVANCE_BATCH(batch);
3824
3825     BEGIN_BATCH(batch, 2);
3826     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
3827     OUT_BATCH(batch, 0);
3828     ADVANCE_BATCH(batch);
3829
3830     /* Disable TE */
3831     BEGIN_BATCH(batch, 4);
3832     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3833     OUT_BATCH(batch, 0);
3834     OUT_BATCH(batch, 0);
3835     OUT_BATCH(batch, 0);
3836     ADVANCE_BATCH(batch);
3837
3838     /* Disable DS */
3839     BEGIN_BATCH(batch, 11);
3840     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
3841     OUT_BATCH(batch, 0);
3842     OUT_BATCH(batch, 0);
3843     OUT_BATCH(batch, 0);
3844     OUT_BATCH(batch, 0);
3845     OUT_BATCH(batch, 0);
3846     OUT_BATCH(batch, 0);
3847     OUT_BATCH(batch, 0);
3848     OUT_BATCH(batch, 0);
3849     OUT_BATCH(batch, 0);
3850     OUT_BATCH(batch, 0);
3851     ADVANCE_BATCH(batch);
3852
3853     BEGIN_BATCH(batch, 9);
3854     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
3855     /* DW1. DS shader pointer */
3856     OUT_BATCH(batch, 0);
3857     OUT_BATCH(batch, 0);
3858     /* DW3-5. DS shader dispatch flag.*/
3859     OUT_BATCH(batch, 0);
3860     OUT_BATCH(batch, 0);
3861     OUT_BATCH(batch, 0);
3862     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
3863     OUT_BATCH(batch, 0);
3864     OUT_BATCH(batch, 0);
3865     /* DW8. DS shader output URB */
3866     OUT_BATCH(batch, 0);
3867     ADVANCE_BATCH(batch);
3868
3869     BEGIN_BATCH(batch, 2);
3870     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3871     OUT_BATCH(batch, 0);
3872     ADVANCE_BATCH(batch);
3873
3874     BEGIN_BATCH(batch, 2);
3875     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
3876     OUT_BATCH(batch, 0);
3877     ADVANCE_BATCH(batch);
3878
3879     /* Disable STREAMOUT */
3880     BEGIN_BATCH(batch, 5);
3881     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
3882     OUT_BATCH(batch, 0);
3883     OUT_BATCH(batch, 0);
3884     OUT_BATCH(batch, 0);
3885     OUT_BATCH(batch, 0);
3886     ADVANCE_BATCH(batch);
3887 }
3888
3889 static void
3890 gen8_emit_invarient_states(VADriverContextP ctx)
3891 {
3892     struct i965_driver_data *i965 = i965_driver_data(ctx);
3893     struct intel_batchbuffer *batch = i965->batch;
3894
3895     BEGIN_BATCH(batch, 1);
3896     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
3897     ADVANCE_BATCH(batch);
3898
3899     BEGIN_BATCH(batch, 2);
3900     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
3901     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
3902               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
3903     ADVANCE_BATCH(batch);
3904
3905     /* Update 3D Multisample pattern */
3906     BEGIN_BATCH(batch, 9);
3907     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
3908     OUT_BATCH(batch, 0);
3909     OUT_BATCH(batch, 0);
3910     OUT_BATCH(batch, 0);
3911     OUT_BATCH(batch, 0);
3912     OUT_BATCH(batch, 0);
3913     OUT_BATCH(batch, 0);
3914     OUT_BATCH(batch, 0);
3915     OUT_BATCH(batch, 0);
3916     ADVANCE_BATCH(batch);
3917
3918
3919     BEGIN_BATCH(batch, 2);
3920     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
3921     OUT_BATCH(batch, 1);
3922     ADVANCE_BATCH(batch);
3923
3924     /* Set system instruction pointer */
3925     BEGIN_BATCH(batch, 3);
3926     OUT_BATCH(batch, CMD_STATE_SIP | 0);
3927     OUT_BATCH(batch, 0);
3928     OUT_BATCH(batch, 0);
3929     ADVANCE_BATCH(batch);
3930 }
3931
3932 static void 
3933 gen8_emit_clip_state(VADriverContextP ctx)
3934 {
3935     struct i965_driver_data *i965 = i965_driver_data(ctx);
3936     struct intel_batchbuffer *batch = i965->batch;
3937
3938     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3939     OUT_BATCH(batch, 0);
3940     OUT_BATCH(batch, 0); /* pass-through */
3941     OUT_BATCH(batch, 0);
3942 }
3943
3944 static void 
3945 gen8_emit_sf_state(VADriverContextP ctx)
3946 {
3947     struct i965_driver_data *i965 = i965_driver_data(ctx);
3948     struct intel_batchbuffer *batch = i965->batch;
3949
3950     BEGIN_BATCH(batch, 5);
3951     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
3952     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
3953     OUT_BATCH(batch, 0);
3954     OUT_BATCH(batch, 0);
3955     OUT_BATCH(batch, 0);
3956     ADVANCE_BATCH(batch);
3957
3958
3959     BEGIN_BATCH(batch, 4);
3960     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
3961     OUT_BATCH(batch,
3962               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
3963               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
3964               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3965               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3966               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3967     OUT_BATCH(batch, 0);
3968     OUT_BATCH(batch, 0);
3969     ADVANCE_BATCH(batch);
3970
3971     /* SBE for backend setup */
3972     BEGIN_BATCH(batch, 11);
3973     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
3974     OUT_BATCH(batch, 0);
3975     OUT_BATCH(batch, 0);
3976     OUT_BATCH(batch, 0);
3977     OUT_BATCH(batch, 0);
3978     OUT_BATCH(batch, 0);
3979     OUT_BATCH(batch, 0);
3980     OUT_BATCH(batch, 0);
3981     OUT_BATCH(batch, 0);
3982     OUT_BATCH(batch, 0);
3983     OUT_BATCH(batch, 0);
3984     ADVANCE_BATCH(batch);
3985
3986     BEGIN_BATCH(batch, 4);
3987     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
3988     OUT_BATCH(batch, 0);
3989     OUT_BATCH(batch, 0);
3990     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3991     ADVANCE_BATCH(batch);
3992 }
3993
3994 static void 
3995 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
3996 {
3997     struct i965_driver_data *i965 = i965_driver_data(ctx);
3998     struct intel_batchbuffer *batch = i965->batch;
3999     struct i965_render_state *render_state = &i965->render_state;
4000     unsigned int num_samples = 0;
4001     unsigned int max_threads;
4002
4003     max_threads = render_state->max_wm_threads - 2;
4004
4005     BEGIN_BATCH(batch, 2);
4006     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
4007     OUT_BATCH(batch,
4008               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
4009     ADVANCE_BATCH(batch);
4010
4011     
4012     if (kernel == PS_KERNEL) {
4013         BEGIN_BATCH(batch, 2);
4014         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
4015         OUT_BATCH(batch,
4016                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
4017         ADVANCE_BATCH(batch);
4018     } else if (kernel == PS_SUBPIC_KERNEL) {
4019         BEGIN_BATCH(batch, 2);
4020         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
4021         OUT_BATCH(batch,
4022                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
4023                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
4024                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
4025                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
4026                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
4027                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
4028         ADVANCE_BATCH(batch);
4029     }
4030
4031     BEGIN_BATCH(batch, 2);
4032     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
4033     OUT_BATCH(batch,
4034               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
4035     ADVANCE_BATCH(batch);
4036
4037     BEGIN_BATCH(batch, 11);
4038     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
4039     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
4040     OUT_BATCH(batch, 0);
4041     /*DW3-4. Constant buffer 0 */
4042     OUT_BATCH(batch, render_state->curbe_offset);
4043     OUT_BATCH(batch, 0);
4044
4045     /*DW5-10. Constant buffer 1-3 */
4046     OUT_BATCH(batch, 0);
4047     OUT_BATCH(batch, 0);
4048     OUT_BATCH(batch, 0);
4049     OUT_BATCH(batch, 0);
4050     OUT_BATCH(batch, 0);
4051     OUT_BATCH(batch, 0);
4052     ADVANCE_BATCH(batch);
4053
4054     BEGIN_BATCH(batch, 12);
4055     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
4056     /* PS shader address */
4057     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
4058
4059     OUT_BATCH(batch, 0);
4060     /* DW3. PS shader flag .Binding table cnt/sample cnt */
4061     OUT_BATCH(batch, 
4062               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
4063               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
4064     /* DW4-5. Scatch space */
4065     OUT_BATCH(batch, 0); /* scratch space base offset */
4066     OUT_BATCH(batch, 0);
4067     /* DW6. PS shader threads. */
4068     OUT_BATCH(batch, 
4069               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
4070               GEN7_PS_PUSH_CONSTANT_ENABLE |
4071               GEN7_PS_16_DISPATCH_ENABLE);
4072     /* DW7. PS shader GRF */
4073     OUT_BATCH(batch, 
4074               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
4075     OUT_BATCH(batch, 0); /* kernel 1 pointer */
4076     OUT_BATCH(batch, 0);
4077     OUT_BATCH(batch, 0); /* kernel 2 pointer */
4078     OUT_BATCH(batch, 0);
4079     ADVANCE_BATCH(batch);
4080
4081     BEGIN_BATCH(batch, 2);
4082     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
4083     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
4084     ADVANCE_BATCH(batch);
4085 }
4086
4087 static void
4088 gen8_emit_depth_buffer_state(VADriverContextP ctx)
4089 {
4090     struct i965_driver_data *i965 = i965_driver_data(ctx);
4091     struct intel_batchbuffer *batch = i965->batch;
4092
4093     BEGIN_BATCH(batch, 8);
4094     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
4095     OUT_BATCH(batch,
4096               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
4097               (I965_SURFACE_NULL << 29));
4098     /* DW2-3. Depth Buffer Address */
4099     OUT_BATCH(batch, 0);
4100     OUT_BATCH(batch, 0);
4101     /* DW4-7. Surface structure */
4102     OUT_BATCH(batch, 0);
4103     OUT_BATCH(batch, 0);
4104     OUT_BATCH(batch, 0);
4105     OUT_BATCH(batch, 0);
4106     ADVANCE_BATCH(batch);
4107
4108     /* Update the Hier Depth buffer */
4109     BEGIN_BATCH(batch, 5);
4110     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
4111     OUT_BATCH(batch, 0);
4112     OUT_BATCH(batch, 0);
4113     OUT_BATCH(batch, 0);
4114     OUT_BATCH(batch, 0);
4115     ADVANCE_BATCH(batch);
4116     
4117     /* Update the stencil buffer */
4118     BEGIN_BATCH(batch, 5);
4119     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
4120     OUT_BATCH(batch, 0);
4121     OUT_BATCH(batch, 0);
4122     OUT_BATCH(batch, 0);
4123     OUT_BATCH(batch, 0);
4124     ADVANCE_BATCH(batch);
4125     
4126     BEGIN_BATCH(batch, 3);
4127     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
4128     OUT_BATCH(batch, 0);
4129     OUT_BATCH(batch, 0);
4130     ADVANCE_BATCH(batch);
4131 }
4132
4133 static void
4134 gen8_emit_depth_stencil_state(VADriverContextP ctx)
4135 {
4136     struct i965_driver_data *i965 = i965_driver_data(ctx);
4137     struct intel_batchbuffer *batch = i965->batch;
4138
4139     BEGIN_BATCH(batch, 3);
4140     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
4141     OUT_BATCH(batch, 0);
4142     OUT_BATCH(batch, 0);
4143     ADVANCE_BATCH(batch);
4144 }
4145
4146 static void
4147 gen8_emit_wm_hz_op(VADriverContextP ctx)
4148 {
4149     struct i965_driver_data *i965 = i965_driver_data(ctx);
4150     struct intel_batchbuffer *batch = i965->batch;
4151
4152     BEGIN_BATCH(batch, 5);
4153     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
4154     OUT_BATCH(batch, 0);
4155     OUT_BATCH(batch, 0);
4156     OUT_BATCH(batch, 0);
4157     OUT_BATCH(batch, 0);
4158     ADVANCE_BATCH(batch);
4159 }
4160
4161 static void
4162 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
4163 {
4164     struct i965_driver_data *i965 = i965_driver_data(ctx);
4165     struct intel_batchbuffer *batch = i965->batch;
4166     struct i965_render_state *render_state = &i965->render_state;
4167
4168     BEGIN_BATCH(batch, 2);
4169     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
4170     OUT_BATCH(batch, render_state->cc_viewport_offset);
4171     ADVANCE_BATCH(batch);
4172
4173     BEGIN_BATCH(batch, 2);
4174     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
4175     OUT_BATCH(batch, 0);
4176     ADVANCE_BATCH(batch);
4177 }
4178
4179 static void
4180 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
4181 {
4182     struct i965_driver_data *i965 = i965_driver_data(ctx);
4183     struct intel_batchbuffer *batch = i965->batch;
4184     struct i965_render_state *render_state = &i965->render_state;
4185
4186     BEGIN_BATCH(batch, 2);
4187     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
4188     OUT_BATCH(batch, render_state->sampler_offset);
4189     ADVANCE_BATCH(batch);
4190 }
4191
4192
4193 static void
4194 gen8_render_emit_states(VADriverContextP ctx, int kernel)
4195 {
4196     struct i965_driver_data *i965 = i965_driver_data(ctx);
4197     struct intel_batchbuffer *batch = i965->batch;
4198
4199     intel_batchbuffer_start_atomic(batch, 0x1000);
4200     intel_batchbuffer_emit_mi_flush(batch);
4201     gen8_emit_invarient_states(ctx);
4202     gen8_emit_state_base_address(ctx);
4203     gen8_emit_viewport_state_pointers(ctx);
4204     gen8_emit_urb(ctx);
4205     gen8_emit_cc_state_pointers(ctx);
4206     gen8_emit_sampler_state_pointers(ctx);
4207     gen8_emit_wm_hz_op(ctx);
4208     gen8_emit_bypass_state(ctx);
4209     gen8_emit_vs_state(ctx);
4210     gen8_emit_clip_state(ctx);
4211     gen8_emit_sf_state(ctx);
4212     gen8_emit_depth_stencil_state(ctx);
4213     gen8_emit_wm_state(ctx, kernel);
4214     gen8_emit_depth_buffer_state(ctx);
4215     gen7_emit_drawing_rectangle(ctx);
4216     gen8_emit_vertex_element_state(ctx);
4217     gen8_emit_vertices(ctx);
4218     intel_batchbuffer_end_atomic(batch);
4219 }
4220
4221 static void
4222 gen7_render_put_surface(
4223     VADriverContextP   ctx,
4224     struct object_surface *obj_surface,    
4225     const VARectangle *src_rect,
4226     const VARectangle *dst_rect,
4227     unsigned int       flags
4228 )
4229 {
4230     struct i965_driver_data *i965 = i965_driver_data(ctx);
4231     struct intel_batchbuffer *batch = i965->batch;
4232
4233     gen7_render_initialize(ctx);
4234     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4235     i965_clear_dest_region(ctx);
4236     gen7_render_emit_states(ctx, PS_KERNEL);
4237     intel_batchbuffer_flush(batch);
4238 }
4239
4240 static void
4241 gen8_render_put_surface(
4242     VADriverContextP   ctx,
4243     struct object_surface *obj_surface,    
4244     const VARectangle *src_rect,
4245     const VARectangle *dst_rect,
4246     unsigned int       flags
4247 )
4248 {
4249     struct i965_driver_data *i965 = i965_driver_data(ctx);
4250     struct intel_batchbuffer *batch = i965->batch;
4251
4252     gen8_render_initialize(ctx);
4253     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4254     gen8_clear_dest_region(ctx);
4255     gen8_render_emit_states(ctx, PS_KERNEL);
4256     intel_batchbuffer_flush(batch);
4257 }
4258
4259 static void
4260 gen7_subpicture_render_blend_state(VADriverContextP ctx)
4261 {
4262     struct i965_driver_data *i965 = i965_driver_data(ctx);
4263     struct i965_render_state *render_state = &i965->render_state;
4264     struct gen6_blend_state *blend_state;
4265
4266     dri_bo_unmap(render_state->cc.state);    
4267     dri_bo_map(render_state->cc.blend, 1);
4268     assert(render_state->cc.blend->virtual);
4269     blend_state = render_state->cc.blend->virtual;
4270     memset(blend_state, 0, sizeof(*blend_state));
4271     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4272     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4273     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
4274     blend_state->blend0.blend_enable = 1;
4275     blend_state->blend1.post_blend_clamp_enable = 1;
4276     blend_state->blend1.pre_blend_clamp_enable = 1;
4277     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4278     dri_bo_unmap(render_state->cc.blend);
4279 }
4280
4281 static void
4282 gen8_subpicture_render_blend_state(VADriverContextP ctx)
4283 {
4284     struct i965_driver_data *i965 = i965_driver_data(ctx);
4285     struct i965_render_state *render_state = &i965->render_state;
4286     struct gen8_global_blend_state *global_blend_state;
4287     struct gen8_blend_state_rt *blend_state;
4288     unsigned char *cc_ptr;
4289     
4290     dri_bo_map(render_state->dynamic_state.bo, 1);
4291     assert(render_state->dynamic_state.bo->virtual);
4292
4293     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4294                         render_state->blend_state_offset;
4295
4296     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
4297
4298     memset(global_blend_state, 0, sizeof(*global_blend_state));
4299     /* Global blend state + blend_state for Render Target */
4300     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
4301     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
4302     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4303     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4304     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
4305     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4306     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4307     blend_state->blend0.colorbuf_blend = 1;
4308     blend_state->blend1.post_blend_clamp_enable = 1;
4309     blend_state->blend1.pre_blend_clamp_enable = 1;
4310     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4311
4312     dri_bo_unmap(render_state->dynamic_state.bo);
4313 }
4314
4315 static void
4316 gen7_subpicture_render_setup_states(
4317     VADriverContextP   ctx,
4318     struct object_surface *obj_surface,
4319     const VARectangle *src_rect,
4320     const VARectangle *dst_rect
4321 )
4322 {
4323     i965_render_dest_surface_state(ctx, 0);
4324     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4325     i965_render_sampler(ctx);
4326     i965_render_cc_viewport(ctx);
4327     gen7_render_color_calc_state(ctx);
4328     gen7_subpicture_render_blend_state(ctx);
4329     gen7_render_depth_stencil_state(ctx);
4330     i965_subpic_render_upload_constants(ctx, obj_surface);
4331     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4332 }
4333
4334 static void
4335 gen8_subpic_render_upload_constants(VADriverContextP ctx,
4336                                     struct object_surface *obj_surface)
4337 {
4338     struct i965_driver_data *i965 = i965_driver_data(ctx);
4339     struct i965_render_state *render_state = &i965->render_state;
4340     float *constant_buffer;
4341     float global_alpha = 1.0;
4342     unsigned int index = obj_surface->subpic_render_idx;
4343     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4344     unsigned char *cc_ptr;
4345
4346     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
4347         global_alpha = obj_subpic->global_alpha;
4348     }
4349
4350
4351     dri_bo_map(render_state->dynamic_state.bo, 1);
4352     assert(render_state->dynamic_state.bo->virtual);
4353
4354     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4355                                 render_state->curbe_offset;
4356
4357     constant_buffer = (float *) cc_ptr;
4358     *constant_buffer = global_alpha;
4359
4360     dri_bo_unmap(render_state->dynamic_state.bo);
4361 }
4362
4363 static void
4364 gen8_subpicture_render_setup_states(
4365     VADriverContextP   ctx,
4366     struct object_surface *obj_surface,
4367     const VARectangle *src_rect,
4368     const VARectangle *dst_rect
4369 )
4370 {
4371     i965_render_dest_surface_state(ctx, 0);
4372     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4373     gen8_render_sampler(ctx);
4374     gen8_render_cc_viewport(ctx);
4375     gen8_render_color_calc_state(ctx);
4376     gen8_subpicture_render_blend_state(ctx);
4377     gen8_subpic_render_upload_constants(ctx, obj_surface);
4378     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4379 }
4380
4381 static void
4382 gen7_render_put_subpicture(
4383     VADriverContextP   ctx,
4384     struct object_surface *obj_surface,
4385     const VARectangle *src_rect,
4386     const VARectangle *dst_rect
4387 )
4388 {
4389     struct i965_driver_data *i965 = i965_driver_data(ctx);
4390     struct intel_batchbuffer *batch = i965->batch;
4391     unsigned int index = obj_surface->subpic_render_idx;
4392     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4393
4394     assert(obj_subpic);
4395     gen7_render_initialize(ctx);
4396     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4397     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4398     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4399     intel_batchbuffer_flush(batch);
4400 }
4401
4402 static void
4403 gen8_render_put_subpicture(
4404     VADriverContextP   ctx,
4405     struct object_surface *obj_surface,
4406     const VARectangle *src_rect,
4407     const VARectangle *dst_rect
4408 )
4409 {
4410     struct i965_driver_data *i965 = i965_driver_data(ctx);
4411     struct intel_batchbuffer *batch = i965->batch;
4412     unsigned int index = obj_surface->subpic_render_idx;
4413     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4414
4415     assert(obj_subpic);
4416     gen8_render_initialize(ctx);
4417     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4418     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4419     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4420     intel_batchbuffer_flush(batch);
4421 }
4422
4423 /*
4424  * global functions
4425  */
4426 VAStatus 
4427 i965_DestroySurfaces(VADriverContextP ctx,
4428                      VASurfaceID *surface_list,
4429                      int num_surfaces);
4430 void
4431 intel_render_put_surface(
4432     VADriverContextP   ctx,
4433     struct object_surface *obj_surface,
4434     const VARectangle *src_rect,
4435     const VARectangle *dst_rect,
4436     unsigned int       flags
4437 )
4438 {
4439     struct i965_driver_data *i965 = i965_driver_data(ctx);
4440     int has_done_scaling = 0;
4441     VASurfaceID out_surface_id = i965_post_processing(ctx,
4442                                                       obj_surface,
4443                                                       src_rect,
4444                                                       dst_rect,
4445                                                       flags,
4446                                                       &has_done_scaling);
4447
4448     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
4449
4450     if (out_surface_id != VA_INVALID_ID) {
4451         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
4452         
4453         if (new_obj_surface && new_obj_surface->bo)
4454             obj_surface = new_obj_surface;
4455
4456         if (has_done_scaling)
4457             src_rect = dst_rect;
4458     }
4459
4460     if (IS_GEN8(i965->intel.device_id))
4461         gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4462     else if (IS_GEN7(i965->intel.device_id))
4463         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4464     else if (IS_GEN6(i965->intel.device_id))
4465         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4466     else
4467         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4468
4469     if (out_surface_id != VA_INVALID_ID)
4470         i965_DestroySurfaces(ctx, &out_surface_id, 1);
4471 }
4472
4473 void
4474 intel_render_put_subpicture(
4475     VADriverContextP   ctx,
4476     struct object_surface *obj_surface,
4477     const VARectangle *src_rect,
4478     const VARectangle *dst_rect
4479 )
4480 {
4481     struct i965_driver_data *i965 = i965_driver_data(ctx);
4482
4483     if (IS_GEN8(i965->intel.device_id))
4484         gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4485     else if (IS_GEN7(i965->intel.device_id))
4486         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4487     else if (IS_GEN6(i965->intel.device_id))
4488         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4489     else
4490         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4491 }
4492
4493 static bool 
4494 gen8_render_init(VADriverContextP ctx)
4495 {
4496     struct i965_driver_data *i965 = i965_driver_data(ctx);
4497     struct i965_render_state *render_state = &i965->render_state;
4498     int i, kernel_size;
4499     unsigned int kernel_offset, end_offset;
4500     unsigned char *kernel_ptr;
4501     struct i965_kernel *kernel;
4502
4503
4504     if (IS_GEN8(i965->intel.device_id)) {
4505         memcpy(render_state->render_kernels, render_kernels_gen8,
4506                         sizeof(render_state->render_kernels));
4507     }
4508
4509     kernel_size = 4096;
4510
4511     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4512         kernel = &render_state->render_kernels[i];
4513
4514         if (!kernel->size)
4515             continue;
4516
4517         kernel_size += kernel->size;
4518     }
4519
4520     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
4521                                   "kernel shader",
4522                                   kernel_size,
4523                                   0x1000);
4524     if (render_state->instruction_state.bo == NULL) {
4525         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
4526         return false;
4527     }
4528
4529     assert(render_state->instruction_state.bo);
4530
4531     render_state->instruction_state.bo_size = kernel_size;
4532     render_state->instruction_state.end_offset = 0;
4533     end_offset = 0;
4534
4535     dri_bo_map(render_state->instruction_state.bo, 1);
4536     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
4537     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4538         kernel = &render_state->render_kernels[i];
4539         kernel_offset = ALIGN(end_offset, 64);
4540         kernel->kernel_offset = kernel_offset;
4541
4542         if (!kernel->size)
4543             continue;
4544
4545         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
4546
4547         end_offset += kernel->size;
4548     }
4549
4550     render_state->instruction_state.end_offset = end_offset;
4551
4552     dri_bo_unmap(render_state->instruction_state.bo);
4553
4554
4555     if (IS_GEN8(i965->intel.device_id)) {
4556         render_state->max_wm_threads = 64;
4557     } else {
4558         /* should never get here !!! */
4559         assert(0);
4560     }
4561
4562     return true;
4563 }
4564
4565
4566 bool 
4567 i965_render_init(VADriverContextP ctx)
4568 {
4569     struct i965_driver_data *i965 = i965_driver_data(ctx);
4570     struct i965_render_state *render_state = &i965->render_state;
4571     int i;
4572
4573     /* kernel */
4574     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
4575                                  sizeof(render_kernels_gen5[0])));
4576     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
4577                                  sizeof(render_kernels_gen6[0])));
4578
4579     if (IS_GEN8(i965->intel.device_id)) {
4580         return gen8_render_init(ctx);
4581     } else  if (IS_GEN7(i965->intel.device_id)) 
4582         memcpy(render_state->render_kernels,
4583                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
4584                sizeof(render_state->render_kernels));
4585     else if (IS_GEN6(i965->intel.device_id))
4586         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
4587     else if (IS_IRONLAKE(i965->intel.device_id))
4588         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
4589     else
4590         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
4591
4592     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4593         struct i965_kernel *kernel = &render_state->render_kernels[i];
4594
4595         if (!kernel->size)
4596             continue;
4597
4598         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
4599                                   kernel->name, 
4600                                   kernel->size, 0x1000);
4601         assert(kernel->bo);
4602         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
4603     }
4604
4605     /* constant buffer */
4606     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
4607                       "constant buffer",
4608                       4096, 64);
4609     assert(render_state->curbe.bo);
4610
4611     if (IS_HSW_GT1(i965->intel.device_id)) {
4612         render_state->max_wm_threads = 102;
4613     } else if (IS_HSW_GT2(i965->intel.device_id)) {
4614         render_state->max_wm_threads = 204;
4615     } else if (IS_HSW_GT3(i965->intel.device_id)) {
4616         render_state->max_wm_threads = 408;
4617     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
4618         render_state->max_wm_threads = 48;
4619     } else if (IS_IVB_GT2(i965->intel.device_id)) {
4620         render_state->max_wm_threads = 172;
4621     } else if (IS_SNB_GT1(i965->intel.device_id)) {
4622         render_state->max_wm_threads = 40;
4623     } else if (IS_SNB_GT2(i965->intel.device_id)) {
4624         render_state->max_wm_threads = 80;
4625     } else if (IS_IRONLAKE(i965->intel.device_id)) {
4626         render_state->max_wm_threads = 72; /* 12 * 6 */
4627     } else if (IS_G4X(i965->intel.device_id)) {
4628         render_state->max_wm_threads = 50; /* 12 * 5 */
4629     } else {
4630         /* should never get here !!! */
4631         assert(0);
4632     }
4633
4634     return true;
4635 }
4636
4637 static void 
4638 gen8_render_terminate(VADriverContextP ctx)
4639 {
4640     int i;
4641     struct i965_driver_data *i965 = i965_driver_data(ctx);
4642     struct i965_render_state *render_state = &i965->render_state;
4643
4644     dri_bo_unreference(render_state->vb.vertex_buffer);
4645     render_state->vb.vertex_buffer = NULL;
4646
4647     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4648     render_state->wm.surface_state_binding_table_bo = NULL;
4649    
4650     if (render_state->instruction_state.bo) {
4651         dri_bo_unreference(render_state->instruction_state.bo);
4652         render_state->instruction_state.bo = NULL;
4653     }
4654
4655     if (render_state->dynamic_state.bo) {
4656         dri_bo_unreference(render_state->dynamic_state.bo);
4657         render_state->dynamic_state.bo = NULL;
4658     }
4659
4660     if (render_state->indirect_state.bo) {
4661         dri_bo_unreference(render_state->indirect_state.bo);
4662         render_state->indirect_state.bo = NULL;
4663     }
4664
4665     if (render_state->draw_region) {
4666         dri_bo_unreference(render_state->draw_region->bo);
4667         free(render_state->draw_region);
4668         render_state->draw_region = NULL;
4669     }
4670 }
4671
4672 void 
4673 i965_render_terminate(VADriverContextP ctx)
4674 {
4675     int i;
4676     struct i965_driver_data *i965 = i965_driver_data(ctx);
4677     struct i965_render_state *render_state = &i965->render_state;
4678
4679     if (IS_GEN8(i965->intel.device_id)) {
4680         gen8_render_terminate(ctx);
4681         return;
4682     } 
4683
4684     dri_bo_unreference(render_state->curbe.bo);
4685     render_state->curbe.bo = NULL;
4686
4687     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4688         struct i965_kernel *kernel = &render_state->render_kernels[i];
4689         
4690         dri_bo_unreference(kernel->bo);
4691         kernel->bo = NULL;
4692     }
4693
4694     dri_bo_unreference(render_state->vb.vertex_buffer);
4695     render_state->vb.vertex_buffer = NULL;
4696     dri_bo_unreference(render_state->vs.state);
4697     render_state->vs.state = NULL;
4698     dri_bo_unreference(render_state->sf.state);
4699     render_state->sf.state = NULL;
4700     dri_bo_unreference(render_state->wm.sampler);
4701     render_state->wm.sampler = NULL;
4702     dri_bo_unreference(render_state->wm.state);
4703     render_state->wm.state = NULL;
4704     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4705     dri_bo_unreference(render_state->cc.viewport);
4706     render_state->cc.viewport = NULL;
4707     dri_bo_unreference(render_state->cc.state);
4708     render_state->cc.state = NULL;
4709     dri_bo_unreference(render_state->cc.blend);
4710     render_state->cc.blend = NULL;
4711     dri_bo_unreference(render_state->cc.depth_stencil);
4712     render_state->cc.depth_stencil = NULL;
4713
4714     if (render_state->draw_region) {
4715         dri_bo_unreference(render_state->draw_region->bo);
4716         free(render_state->draw_region);
4717         render_state->draw_region = NULL;
4718     }
4719 }
4720