OSDN Git Service

Add the support of brightness/contrast/hue/saturation for BDW rendering
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 /*TODO: Modify the shader for GEN8.
151  * Now it only uses the shader for gen7/haswell
152  */
153 /* Programs for Gen8 */
154 static const uint32_t sf_kernel_static_gen8[][4] = 
155 {
156 };
157 static const uint32_t ps_kernel_static_gen8[][4] = {
158 #include "shaders/render/exa_wm_src_affine.g8b"
159 #include "shaders/render/exa_wm_src_sample_planar.g8b"
160 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
161 #include "shaders/render/exa_wm_yuv_rgb.g8b"
162 #include "shaders/render/exa_wm_write.g8b"
163 };
164
165 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
166 #include "shaders/render/exa_wm_src_affine.g8b"
167 #include "shaders/render/exa_wm_src_sample_argb.g8b"
168 #include "shaders/render/exa_wm_write.g8b"
169 };
170
171
172 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \
173                                 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
174
175 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
176 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
177
178 static uint32_t float_to_uint (float f) 
179 {
180     union {
181         uint32_t i; 
182         float f;
183     } x;
184
185     x.f = f;
186     return x.i;
187 }
188
189 enum 
190 {
191     SF_KERNEL = 0,
192     PS_KERNEL,
193     PS_SUBPIC_KERNEL
194 };
195
196 static struct i965_kernel render_kernels_gen4[] = {
197     {
198         "SF",
199         SF_KERNEL,
200         sf_kernel_static,
201         sizeof(sf_kernel_static),
202         NULL
203     },
204     {
205         "PS",
206         PS_KERNEL,
207         ps_kernel_static,
208         sizeof(ps_kernel_static),
209         NULL
210     },
211
212     {
213         "PS_SUBPIC",
214         PS_SUBPIC_KERNEL,
215         ps_subpic_kernel_static,
216         sizeof(ps_subpic_kernel_static),
217         NULL
218     }
219 };
220
221 static struct i965_kernel render_kernels_gen5[] = {
222     {
223         "SF",
224         SF_KERNEL,
225         sf_kernel_static_gen5,
226         sizeof(sf_kernel_static_gen5),
227         NULL
228     },
229     {
230         "PS",
231         PS_KERNEL,
232         ps_kernel_static_gen5,
233         sizeof(ps_kernel_static_gen5),
234         NULL
235     },
236
237     {
238         "PS_SUBPIC",
239         PS_SUBPIC_KERNEL,
240         ps_subpic_kernel_static_gen5,
241         sizeof(ps_subpic_kernel_static_gen5),
242         NULL
243     }
244 };
245
246 static struct i965_kernel render_kernels_gen6[] = {
247     {
248         "SF",
249         SF_KERNEL,
250         sf_kernel_static_gen6,
251         sizeof(sf_kernel_static_gen6),
252         NULL
253     },
254     {
255         "PS",
256         PS_KERNEL,
257         ps_kernel_static_gen6,
258         sizeof(ps_kernel_static_gen6),
259         NULL
260     },
261
262     {
263         "PS_SUBPIC",
264         PS_SUBPIC_KERNEL,
265         ps_subpic_kernel_static_gen6,
266         sizeof(ps_subpic_kernel_static_gen6),
267         NULL
268     }
269 };
270
271 static struct i965_kernel render_kernels_gen7[] = {
272     {
273         "SF",
274         SF_KERNEL,
275         sf_kernel_static_gen7,
276         sizeof(sf_kernel_static_gen7),
277         NULL
278     },
279     {
280         "PS",
281         PS_KERNEL,
282         ps_kernel_static_gen7,
283         sizeof(ps_kernel_static_gen7),
284         NULL
285     },
286
287     {
288         "PS_SUBPIC",
289         PS_SUBPIC_KERNEL,
290         ps_subpic_kernel_static_gen7,
291         sizeof(ps_subpic_kernel_static_gen7),
292         NULL
293     }
294 };
295
296 static struct i965_kernel render_kernels_gen7_haswell[] = {
297     {
298         "SF",
299         SF_KERNEL,
300         sf_kernel_static_gen7,
301         sizeof(sf_kernel_static_gen7),
302         NULL
303     },
304     {
305         "PS",
306         PS_KERNEL,
307         ps_kernel_static_gen7_haswell,
308         sizeof(ps_kernel_static_gen7_haswell),
309         NULL
310     },
311
312     {
313         "PS_SUBPIC",
314         PS_SUBPIC_KERNEL,
315         ps_subpic_kernel_static_gen7,
316         sizeof(ps_subpic_kernel_static_gen7),
317         NULL
318     }
319 };
320
321 static struct i965_kernel render_kernels_gen8[] = {
322     {
323         "SF",
324         SF_KERNEL,
325         sf_kernel_static_gen8,
326         sizeof(sf_kernel_static_gen8),
327         NULL
328     },
329     {
330         "PS",
331         PS_KERNEL,
332         ps_kernel_static_gen8,
333         sizeof(ps_kernel_static_gen8),
334         NULL
335     },
336
337     {
338         "PS_SUBPIC",
339         PS_SUBPIC_KERNEL,
340         ps_subpic_kernel_static_gen8,
341         sizeof(ps_subpic_kernel_static_gen8),
342         NULL
343     }
344 };
345
346 #define URB_VS_ENTRIES        8
347 #define URB_VS_ENTRY_SIZE     1
348
349 #define URB_GS_ENTRIES        0
350 #define URB_GS_ENTRY_SIZE     0
351
352 #define URB_CLIP_ENTRIES      0
353 #define URB_CLIP_ENTRY_SIZE   0
354
355 #define URB_SF_ENTRIES        1
356 #define URB_SF_ENTRY_SIZE     2
357
358 #define URB_CS_ENTRIES        4
359 #define URB_CS_ENTRY_SIZE     4
360
361 static float yuv_to_rgb_bt601[3][4] = {
362 {1.164,         0,      1.596,          -0.06275,},
363 {1.164,         -0.392, -0.813,         -0.50196,},
364 {1.164,         2.017,  0,              -0.50196,},
365 };
366
367 static float yuv_to_rgb_bt709[3][4] = {
368 {1.164,         0,      1.793,          -0.06275,},
369 {1.164,         -0.213, -0.533,         -0.50196,},
370 {1.164,         2.112,  0,              -0.50196,},
371 };
372
373 static float yuv_to_rgb_smpte_240[3][4] = {
374 {1.164,         0,      1.794,          -0.06275,},
375 {1.164,         -0.258, -0.5425,        -0.50196,},
376 {1.164,         2.078,  0,              -0.50196,},
377 };
378
379 static void
380 i965_render_vs_unit(VADriverContextP ctx)
381 {
382     struct i965_driver_data *i965 = i965_driver_data(ctx);
383     struct i965_render_state *render_state = &i965->render_state;
384     struct i965_vs_unit_state *vs_state;
385
386     dri_bo_map(render_state->vs.state, 1);
387     assert(render_state->vs.state->virtual);
388     vs_state = render_state->vs.state->virtual;
389     memset(vs_state, 0, sizeof(*vs_state));
390
391     if (IS_IRONLAKE(i965->intel.device_id))
392         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
393     else
394         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
395
396     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
397     vs_state->vs6.vs_enable = 0;
398     vs_state->vs6.vert_cache_disable = 1;
399     
400     dri_bo_unmap(render_state->vs.state);
401 }
402
403 static void
404 i965_render_sf_unit(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_render_state *render_state = &i965->render_state;
408     struct i965_sf_unit_state *sf_state;
409
410     dri_bo_map(render_state->sf.state, 1);
411     assert(render_state->sf.state->virtual);
412     sf_state = render_state->sf.state->virtual;
413     memset(sf_state, 0, sizeof(*sf_state));
414
415     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
416     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
417
418     sf_state->sf1.single_program_flow = 1; /* XXX */
419     sf_state->sf1.binding_table_entry_count = 0;
420     sf_state->sf1.thread_priority = 0;
421     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
422     sf_state->sf1.illegal_op_exception_enable = 1;
423     sf_state->sf1.mask_stack_exception_enable = 1;
424     sf_state->sf1.sw_exception_enable = 1;
425
426     /* scratch space is not used in our kernel */
427     sf_state->thread2.per_thread_scratch_space = 0;
428     sf_state->thread2.scratch_space_base_pointer = 0;
429
430     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
431     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
432     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
433     sf_state->thread3.urb_entry_read_offset = 0;
434     sf_state->thread3.dispatch_grf_start_reg = 3;
435
436     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
437     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
438     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
439     sf_state->thread4.stats_enable = 1;
440
441     sf_state->sf5.viewport_transform = 0; /* skip viewport */
442
443     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
444     sf_state->sf6.scissor = 0;
445
446     sf_state->sf7.trifan_pv = 2;
447
448     sf_state->sf6.dest_org_vbias = 0x8;
449     sf_state->sf6.dest_org_hbias = 0x8;
450
451     dri_bo_emit_reloc(render_state->sf.state,
452                       I915_GEM_DOMAIN_INSTRUCTION, 0,
453                       sf_state->thread0.grf_reg_count << 1,
454                       offsetof(struct i965_sf_unit_state, thread0),
455                       render_state->render_kernels[SF_KERNEL].bo);
456
457     dri_bo_unmap(render_state->sf.state);
458 }
459
460 static void 
461 i965_render_sampler(VADriverContextP ctx)
462 {
463     struct i965_driver_data *i965 = i965_driver_data(ctx);
464     struct i965_render_state *render_state = &i965->render_state;
465     struct i965_sampler_state *sampler_state;
466     int i;
467     
468     assert(render_state->wm.sampler_count > 0);
469     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
470
471     dri_bo_map(render_state->wm.sampler, 1);
472     assert(render_state->wm.sampler->virtual);
473     sampler_state = render_state->wm.sampler->virtual;
474     for (i = 0; i < render_state->wm.sampler_count; i++) {
475         memset(sampler_state, 0, sizeof(*sampler_state));
476         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
477         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
478         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
479         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
480         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
481         sampler_state++;
482     }
483
484     dri_bo_unmap(render_state->wm.sampler);
485 }
486 static void
487 i965_subpic_render_wm_unit(VADriverContextP ctx)
488 {
489     struct i965_driver_data *i965 = i965_driver_data(ctx);
490     struct i965_render_state *render_state = &i965->render_state;
491     struct i965_wm_unit_state *wm_state;
492
493     assert(render_state->wm.sampler);
494
495     dri_bo_map(render_state->wm.state, 1);
496     assert(render_state->wm.state->virtual);
497     wm_state = render_state->wm.state->virtual;
498     memset(wm_state, 0, sizeof(*wm_state));
499
500     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
501     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
502
503     wm_state->thread1.single_program_flow = 1; /* XXX */
504
505     if (IS_IRONLAKE(i965->intel.device_id))
506         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
507     else
508         wm_state->thread1.binding_table_entry_count = 7;
509
510     wm_state->thread2.scratch_space_base_pointer = 0;
511     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
512
513     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
514     wm_state->thread3.const_urb_entry_read_length = 4;
515     wm_state->thread3.const_urb_entry_read_offset = 0;
516     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
517     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
518
519     wm_state->wm4.stats_enable = 0;
520     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
521
522     if (IS_IRONLAKE(i965->intel.device_id)) {
523         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
524     } else {
525         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
526     }
527
528     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
529     wm_state->wm5.thread_dispatch_enable = 1;
530     wm_state->wm5.enable_16_pix = 1;
531     wm_state->wm5.enable_8_pix = 0;
532     wm_state->wm5.early_depth_test = 1;
533
534     dri_bo_emit_reloc(render_state->wm.state,
535                       I915_GEM_DOMAIN_INSTRUCTION, 0,
536                       wm_state->thread0.grf_reg_count << 1,
537                       offsetof(struct i965_wm_unit_state, thread0),
538                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
539
540     dri_bo_emit_reloc(render_state->wm.state,
541                       I915_GEM_DOMAIN_INSTRUCTION, 0,
542                       wm_state->wm4.sampler_count << 2,
543                       offsetof(struct i965_wm_unit_state, wm4),
544                       render_state->wm.sampler);
545
546     dri_bo_unmap(render_state->wm.state);
547 }
548
549
550 static void
551 i965_render_wm_unit(VADriverContextP ctx)
552 {
553     struct i965_driver_data *i965 = i965_driver_data(ctx);
554     struct i965_render_state *render_state = &i965->render_state;
555     struct i965_wm_unit_state *wm_state;
556
557     assert(render_state->wm.sampler);
558
559     dri_bo_map(render_state->wm.state, 1);
560     assert(render_state->wm.state->virtual);
561     wm_state = render_state->wm.state->virtual;
562     memset(wm_state, 0, sizeof(*wm_state));
563
564     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
565     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
566
567     wm_state->thread1.single_program_flow = 1; /* XXX */
568
569     if (IS_IRONLAKE(i965->intel.device_id))
570         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
571     else
572         wm_state->thread1.binding_table_entry_count = 7;
573
574     wm_state->thread2.scratch_space_base_pointer = 0;
575     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
576
577     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
578     wm_state->thread3.const_urb_entry_read_length = 4;
579     wm_state->thread3.const_urb_entry_read_offset = 0;
580     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
581     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
582
583     wm_state->wm4.stats_enable = 0;
584     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
585
586     if (IS_IRONLAKE(i965->intel.device_id)) {
587         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
588     } else {
589         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
590     }
591
592     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
593     wm_state->wm5.thread_dispatch_enable = 1;
594     wm_state->wm5.enable_16_pix = 1;
595     wm_state->wm5.enable_8_pix = 0;
596     wm_state->wm5.early_depth_test = 1;
597
598     dri_bo_emit_reloc(render_state->wm.state,
599                       I915_GEM_DOMAIN_INSTRUCTION, 0,
600                       wm_state->thread0.grf_reg_count << 1,
601                       offsetof(struct i965_wm_unit_state, thread0),
602                       render_state->render_kernels[PS_KERNEL].bo);
603
604     dri_bo_emit_reloc(render_state->wm.state,
605                       I915_GEM_DOMAIN_INSTRUCTION, 0,
606                       wm_state->wm4.sampler_count << 2,
607                       offsetof(struct i965_wm_unit_state, wm4),
608                       render_state->wm.sampler);
609
610     dri_bo_unmap(render_state->wm.state);
611 }
612
613 static void 
614 i965_render_cc_viewport(VADriverContextP ctx)
615 {
616     struct i965_driver_data *i965 = i965_driver_data(ctx);
617     struct i965_render_state *render_state = &i965->render_state;
618     struct i965_cc_viewport *cc_viewport;
619
620     dri_bo_map(render_state->cc.viewport, 1);
621     assert(render_state->cc.viewport->virtual);
622     cc_viewport = render_state->cc.viewport->virtual;
623     memset(cc_viewport, 0, sizeof(*cc_viewport));
624     
625     cc_viewport->min_depth = -1.e35;
626     cc_viewport->max_depth = 1.e35;
627
628     dri_bo_unmap(render_state->cc.viewport);
629 }
630
631 static void 
632 i965_subpic_render_cc_unit(VADriverContextP ctx)
633 {
634     struct i965_driver_data *i965 = i965_driver_data(ctx);
635     struct i965_render_state *render_state = &i965->render_state;
636     struct i965_cc_unit_state *cc_state;
637
638     assert(render_state->cc.viewport);
639
640     dri_bo_map(render_state->cc.state, 1);
641     assert(render_state->cc.state->virtual);
642     cc_state = render_state->cc.state->virtual;
643     memset(cc_state, 0, sizeof(*cc_state));
644
645     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
646     cc_state->cc2.depth_test = 0;       /* disable depth test */
647     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
648     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
649     cc_state->cc3.blend_enable = 1;     /* enable color blend */
650     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
651     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
652     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
653     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
654
655     cc_state->cc5.dither_enable = 0;    /* disable dither */
656     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
657     cc_state->cc5.statistics_enable = 1;
658     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
659     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
660     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
661
662     cc_state->cc6.clamp_post_alpha_blend = 0; 
663     cc_state->cc6.clamp_pre_alpha_blend  =0; 
664     
665     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
666     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
667     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
668     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
669    
670     /*alpha test reference*/
671     cc_state->cc7.alpha_ref.f =0.0 ;
672
673
674     dri_bo_emit_reloc(render_state->cc.state,
675                       I915_GEM_DOMAIN_INSTRUCTION, 0,
676                       0,
677                       offsetof(struct i965_cc_unit_state, cc4),
678                       render_state->cc.viewport);
679
680     dri_bo_unmap(render_state->cc.state);
681 }
682
683
684 static void 
685 i965_render_cc_unit(VADriverContextP ctx)
686 {
687     struct i965_driver_data *i965 = i965_driver_data(ctx);
688     struct i965_render_state *render_state = &i965->render_state;
689     struct i965_cc_unit_state *cc_state;
690
691     assert(render_state->cc.viewport);
692
693     dri_bo_map(render_state->cc.state, 1);
694     assert(render_state->cc.state->virtual);
695     cc_state = render_state->cc.state->virtual;
696     memset(cc_state, 0, sizeof(*cc_state));
697
698     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
699     cc_state->cc2.depth_test = 0;       /* disable depth test */
700     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
701     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
702     cc_state->cc3.blend_enable = 0;     /* disable color blend */
703     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
704     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
705
706     cc_state->cc5.dither_enable = 0;    /* disable dither */
707     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
708     cc_state->cc5.statistics_enable = 1;
709     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
710     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
711     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
712
713     dri_bo_emit_reloc(render_state->cc.state,
714                       I915_GEM_DOMAIN_INSTRUCTION, 0,
715                       0,
716                       offsetof(struct i965_cc_unit_state, cc4),
717                       render_state->cc.viewport);
718
719     dri_bo_unmap(render_state->cc.state);
720 }
721
722 static void
723 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
724 {
725     switch (tiling) {
726     case I915_TILING_NONE:
727         ss->ss3.tiled_surface = 0;
728         ss->ss3.tile_walk = 0;
729         break;
730     case I915_TILING_X:
731         ss->ss3.tiled_surface = 1;
732         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
733         break;
734     case I915_TILING_Y:
735         ss->ss3.tiled_surface = 1;
736         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
737         break;
738     }
739 }
740
741 static void
742 i965_render_set_surface_state(
743     struct i965_surface_state *ss,
744     dri_bo                    *bo,
745     unsigned long              offset,
746     unsigned int               width,
747     unsigned int               height,
748     unsigned int               pitch,
749     unsigned int               format,
750     unsigned int               flags
751 )
752 {
753     unsigned int tiling;
754     unsigned int swizzle;
755
756     memset(ss, 0, sizeof(*ss));
757
758     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
759     case I965_PP_FLAG_BOTTOM_FIELD:
760         ss->ss0.vert_line_stride_ofs = 1;
761         /* fall-through */
762     case I965_PP_FLAG_TOP_FIELD:
763         ss->ss0.vert_line_stride = 1;
764         height /= 2;
765         break;
766     }
767
768     ss->ss0.surface_type = I965_SURFACE_2D;
769     ss->ss0.surface_format = format;
770     ss->ss0.color_blend = 1;
771
772     ss->ss1.base_addr = bo->offset + offset;
773
774     ss->ss2.width = width - 1;
775     ss->ss2.height = height - 1;
776
777     ss->ss3.pitch = pitch - 1;
778
779     dri_bo_get_tiling(bo, &tiling, &swizzle);
780     i965_render_set_surface_tiling(ss, tiling);
781 }
782
783 static void
784 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
785 {
786    switch (tiling) {
787    case I915_TILING_NONE:
788       ss->ss0.tiled_surface = 0;
789       ss->ss0.tile_walk = 0;
790       break;
791    case I915_TILING_X:
792       ss->ss0.tiled_surface = 1;
793       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
794       break;
795    case I915_TILING_Y:
796       ss->ss0.tiled_surface = 1;
797       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
798       break;
799    }
800 }
801
802 static void
803 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
804 {
805    switch (tiling) {
806    case I915_TILING_NONE:
807       ss->ss0.tiled_surface = 0;
808       ss->ss0.tile_walk = 0;
809       break;
810    case I915_TILING_X:
811       ss->ss0.tiled_surface = 1;
812       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
813       break;
814    case I915_TILING_Y:
815       ss->ss0.tiled_surface = 1;
816       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
817       break;
818    }
819 }
820
821 /* Set "Shader Channel Select" */
822 void
823 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
824 {
825     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
826     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
827     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
828     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
829 }
830
831 /* Set "Shader Channel Select" for GEN8+ */
832 void
833 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
834 {
835     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
836     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
837     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
838     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
839 }
840
841 static void
842 gen7_render_set_surface_state(
843     struct gen7_surface_state *ss,
844     dri_bo                    *bo,
845     unsigned long              offset,
846     int                        width,
847     int                        height,
848     int                        pitch,
849     int                        format,
850     unsigned int               flags
851 )
852 {
853     unsigned int tiling;
854     unsigned int swizzle;
855
856     memset(ss, 0, sizeof(*ss));
857
858     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
859     case I965_PP_FLAG_BOTTOM_FIELD:
860         ss->ss0.vert_line_stride_ofs = 1;
861         /* fall-through */
862     case I965_PP_FLAG_TOP_FIELD:
863         ss->ss0.vert_line_stride = 1;
864         height /= 2;
865         break;
866     }
867
868     ss->ss0.surface_type = I965_SURFACE_2D;
869     ss->ss0.surface_format = format;
870
871     ss->ss1.base_addr = bo->offset + offset;
872
873     ss->ss2.width = width - 1;
874     ss->ss2.height = height - 1;
875
876     ss->ss3.pitch = pitch - 1;
877
878     dri_bo_get_tiling(bo, &tiling, &swizzle);
879     gen7_render_set_surface_tiling(ss, tiling);
880 }
881
882
883 static void
884 gen8_render_set_surface_state(
885     struct gen8_surface_state *ss,
886     dri_bo                    *bo,
887     unsigned long              offset,
888     int                        width,
889     int                        height,
890     int                        pitch,
891     int                        format,
892     unsigned int               flags
893 )
894 {
895     unsigned int tiling;
896     unsigned int swizzle;
897
898     memset(ss, 0, sizeof(*ss));
899
900     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
901     case I965_PP_FLAG_BOTTOM_FIELD:
902         ss->ss0.vert_line_stride_ofs = 1;
903         /* fall-through */
904     case I965_PP_FLAG_TOP_FIELD:
905         ss->ss0.vert_line_stride = 1;
906         height /= 2;
907         break;
908     }
909
910     ss->ss0.surface_type = I965_SURFACE_2D;
911     ss->ss0.surface_format = format;
912
913     ss->ss8.base_addr = bo->offset + offset;
914
915     ss->ss2.width = width - 1;
916     ss->ss2.height = height - 1;
917
918     ss->ss3.pitch = pitch - 1;
919
920     /* Always set 1(align 4 mode) per B-spec */
921     ss->ss0.vertical_alignment = 1;
922     ss->ss0.horizontal_alignment = 1;
923
924     dri_bo_get_tiling(bo, &tiling, &swizzle);
925     gen8_render_set_surface_tiling(ss, tiling);
926 }
927
928 static void
929 i965_render_src_surface_state(
930     VADriverContextP ctx, 
931     int              index,
932     dri_bo          *region,
933     unsigned long    offset,
934     int              w,
935     int              h,
936     int              pitch,
937     int              format,
938     unsigned int     flags
939 )
940 {
941     struct i965_driver_data *i965 = i965_driver_data(ctx);  
942     struct i965_render_state *render_state = &i965->render_state;
943     void *ss;
944     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
945
946     assert(index < MAX_RENDER_SURFACES);
947
948     dri_bo_map(ss_bo, 1);
949     assert(ss_bo->virtual);
950     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
951
952     if (IS_GEN8(i965->intel.device_id)) {
953         gen8_render_set_surface_state(ss,
954                                       region, offset,
955                                       w, h,
956                                       pitch, format, flags);
957         gen8_render_set_surface_scs(ss);
958         dri_bo_emit_reloc(ss_bo,
959                           I915_GEM_DOMAIN_SAMPLER, 0,
960                           offset,
961                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
962                           region);
963     } else  if (IS_GEN7(i965->intel.device_id)) {
964         gen7_render_set_surface_state(ss,
965                                       region, offset,
966                                       w, h,
967                                       pitch, format, flags);
968         if (IS_HASWELL(i965->intel.device_id))
969             gen7_render_set_surface_scs(ss);
970         dri_bo_emit_reloc(ss_bo,
971                           I915_GEM_DOMAIN_SAMPLER, 0,
972                           offset,
973                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
974                           region);
975     } else {
976         i965_render_set_surface_state(ss,
977                                       region, offset,
978                                       w, h,
979                                       pitch, format, flags);
980         dri_bo_emit_reloc(ss_bo,
981                           I915_GEM_DOMAIN_SAMPLER, 0,
982                           offset,
983                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
984                           region);
985     }
986
987     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
988     dri_bo_unmap(ss_bo);
989     render_state->wm.sampler_count++;
990 }
991
992 static void
993 i965_render_src_surfaces_state(
994     VADriverContextP ctx,
995     struct object_surface *obj_surface,
996     unsigned int     flags
997 )
998 {
999     int region_pitch;
1000     int rw, rh;
1001     dri_bo *region;
1002
1003     region_pitch = obj_surface->width;
1004     rw = obj_surface->orig_width;
1005     rh = obj_surface->orig_height;
1006     region = obj_surface->bo;
1007
1008     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
1009     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
1010
1011     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
1012         i965_render_src_surface_state(ctx, 3, region,
1013                                       region_pitch * obj_surface->y_cb_offset,
1014                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1015                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
1016         i965_render_src_surface_state(ctx, 4, region,
1017                                       region_pitch * obj_surface->y_cb_offset,
1018                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1019                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
1020     } else {
1021         i965_render_src_surface_state(ctx, 3, region,
1022                                       region_pitch * obj_surface->y_cb_offset,
1023                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1024                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
1025         i965_render_src_surface_state(ctx, 4, region,
1026                                       region_pitch * obj_surface->y_cb_offset,
1027                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1028                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1029         i965_render_src_surface_state(ctx, 5, region,
1030                                       region_pitch * obj_surface->y_cr_offset,
1031                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1032                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
1033         i965_render_src_surface_state(ctx, 6, region,
1034                                       region_pitch * obj_surface->y_cr_offset,
1035                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1036                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1037     }
1038 }
1039
1040 static void
1041 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
1042                                       struct object_surface *obj_surface)
1043 {
1044     dri_bo *subpic_region;
1045     unsigned int index = obj_surface->subpic_render_idx;
1046     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1047     struct object_image *obj_image = obj_subpic->obj_image;
1048
1049     assert(obj_surface);
1050     assert(obj_surface->bo);
1051     subpic_region = obj_image->bo;
1052     /*subpicture surface*/
1053     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1054     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1055 }
1056
1057 static void
1058 i965_render_dest_surface_state(VADriverContextP ctx, int index)
1059 {
1060     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1061     struct i965_render_state *render_state = &i965->render_state;
1062     struct intel_region *dest_region = render_state->draw_region;
1063     void *ss;
1064     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
1065     int format;
1066     assert(index < MAX_RENDER_SURFACES);
1067
1068     if (dest_region->cpp == 2) {
1069         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
1070     } else {
1071         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
1072     }
1073
1074     dri_bo_map(ss_bo, 1);
1075     assert(ss_bo->virtual);
1076     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
1077
1078     if (IS_GEN8(i965->intel.device_id)) {
1079         gen8_render_set_surface_state(ss,
1080                                       dest_region->bo, 0,
1081                                       dest_region->width, dest_region->height,
1082                                       dest_region->pitch, format, 0);
1083         gen8_render_set_surface_scs(ss);
1084         dri_bo_emit_reloc(ss_bo,
1085                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1086                           0,
1087                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
1088                           dest_region->bo);
1089     } else if (IS_GEN7(i965->intel.device_id)) {
1090         gen7_render_set_surface_state(ss,
1091                                       dest_region->bo, 0,
1092                                       dest_region->width, dest_region->height,
1093                                       dest_region->pitch, format, 0);
1094         if (IS_HASWELL(i965->intel.device_id))
1095             gen7_render_set_surface_scs(ss);
1096         dri_bo_emit_reloc(ss_bo,
1097                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1098                           0,
1099                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1100                           dest_region->bo);
1101     } else {
1102         i965_render_set_surface_state(ss,
1103                                       dest_region->bo, 0,
1104                                       dest_region->width, dest_region->height,
1105                                       dest_region->pitch, format, 0);
1106         dri_bo_emit_reloc(ss_bo,
1107                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1108                           0,
1109                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1110                           dest_region->bo);
1111     }
1112
1113     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1114     dri_bo_unmap(ss_bo);
1115 }
1116
1117 static void
1118 i965_fill_vertex_buffer(
1119     VADriverContextP ctx,
1120     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1121     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1122 )
1123 {
1124     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1125     float vb[12];
1126
1127     enum { X1, Y1, X2, Y2 };
1128
1129     static const unsigned int g_rotation_indices[][6] = {
1130         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1131         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1132         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1133         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1134     };
1135
1136     const unsigned int * const rotation_indices =
1137         g_rotation_indices[i965->rotation_attrib->value];
1138
1139     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1140     vb[1]  = tex_coords[rotation_indices[1]];
1141     vb[2]  = vid_coords[X2];
1142     vb[3]  = vid_coords[Y2];
1143
1144     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1145     vb[5]  = tex_coords[rotation_indices[3]];
1146     vb[6]  = vid_coords[X1];
1147     vb[7]  = vid_coords[Y2];
1148
1149     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1150     vb[9]  = tex_coords[rotation_indices[5]];
1151     vb[10] = vid_coords[X1];
1152     vb[11] = vid_coords[Y1];
1153
1154     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1155 }
1156
1157 static void 
1158 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1159                                  struct object_surface *obj_surface,
1160                                  const VARectangle *output_rect)
1161 {    
1162     unsigned int index = obj_surface->subpic_render_idx;
1163     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1164     float tex_coords[4], vid_coords[4];
1165     VARectangle dst_rect;
1166
1167     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1168         dst_rect = obj_subpic->dst_rect;
1169     else {
1170         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1171         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1172         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1173         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1174         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1175         dst_rect.height = sy * obj_subpic->dst_rect.height;
1176     }
1177
1178     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1179     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1180     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1181     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1182
1183     vid_coords[0] = dst_rect.x;
1184     vid_coords[1] = dst_rect.y;
1185     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1186     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1187
1188     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1189 }
1190
1191 static void 
1192 i965_render_upload_vertex(
1193     VADriverContextP   ctx,
1194     struct object_surface *obj_surface,
1195     const VARectangle *src_rect,
1196     const VARectangle *dst_rect
1197 )
1198 {
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct i965_render_state *render_state = &i965->render_state;
1201     struct intel_region *dest_region = render_state->draw_region;
1202     float tex_coords[4], vid_coords[4];
1203     int width, height;
1204
1205     width  = obj_surface->orig_width;
1206     height = obj_surface->orig_height;
1207
1208     tex_coords[0] = (float)src_rect->x / width;
1209     tex_coords[1] = (float)src_rect->y / height;
1210     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1211     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1212
1213     vid_coords[0] = dest_region->x + dst_rect->x;
1214     vid_coords[1] = dest_region->y + dst_rect->y;
1215     vid_coords[2] = vid_coords[0] + dst_rect->width;
1216     vid_coords[3] = vid_coords[1] + dst_rect->height;
1217
1218     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1219 }
1220
1221 #define PI  3.1415926
1222
1223 static void
1224 i965_render_upload_constants(VADriverContextP ctx,
1225                              struct object_surface *obj_surface,
1226                              unsigned int flags)
1227 {
1228     struct i965_driver_data *i965 = i965_driver_data(ctx);
1229     struct i965_render_state *render_state = &i965->render_state;
1230     unsigned short *constant_buffer;
1231     float *color_balance_base;
1232     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1233     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1234     float hue = (float)i965->hue_attrib->value / 180 * PI;
1235     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1236     float *yuv_to_rgb;
1237     unsigned int color_flag;
1238
1239     dri_bo_map(render_state->curbe.bo, 1);
1240     assert(render_state->curbe.bo->virtual);
1241     constant_buffer = render_state->curbe.bo->virtual;
1242
1243     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1244         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1245
1246         constant_buffer[0] = 2;
1247     } else {
1248         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1249             constant_buffer[0] = 1;
1250         else
1251             constant_buffer[0] = 0;
1252     }
1253
1254     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1255         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1256         i965->hue_attrib->value == DEFAULT_HUE &&
1257         i965->saturation_attrib->value == DEFAULT_SATURATION)
1258         constant_buffer[1] = 1; /* skip color balance transformation */
1259     else
1260         constant_buffer[1] = 0;
1261
1262     color_balance_base = (float *)constant_buffer + 4;
1263     *color_balance_base++ = contrast;
1264     *color_balance_base++ = brightness;
1265     *color_balance_base++ = cos(hue) * contrast * saturation;
1266     *color_balance_base++ = sin(hue) * contrast * saturation;
1267
1268     color_flag = flags & VA_SRC_COLOR_MASK;
1269     yuv_to_rgb = (float *)constant_buffer + 8;
1270     if (color_flag == VA_SRC_BT709)
1271         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1272     else if (color_flag == VA_SRC_SMPTE_240)
1273         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1274     else
1275         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1276
1277     dri_bo_unmap(render_state->curbe.bo);
1278 }
1279
1280 static void
1281 i965_subpic_render_upload_constants(VADriverContextP ctx,
1282                                     struct object_surface *obj_surface)
1283 {
1284     struct i965_driver_data *i965 = i965_driver_data(ctx);
1285     struct i965_render_state *render_state = &i965->render_state;
1286     float *constant_buffer;
1287     float global_alpha = 1.0;
1288     unsigned int index = obj_surface->subpic_render_idx;
1289     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1290     
1291     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1292         global_alpha = obj_subpic->global_alpha;
1293     }
1294
1295     dri_bo_map(render_state->curbe.bo, 1);
1296
1297     assert(render_state->curbe.bo->virtual);
1298     constant_buffer = render_state->curbe.bo->virtual;
1299     *constant_buffer = global_alpha;
1300
1301     dri_bo_unmap(render_state->curbe.bo);
1302 }
1303  
1304 static void
1305 i965_surface_render_state_setup(
1306     VADriverContextP   ctx,
1307     struct object_surface *obj_surface,
1308     const VARectangle *src_rect,
1309     const VARectangle *dst_rect,
1310     unsigned int       flags
1311 )
1312 {
1313     i965_render_vs_unit(ctx);
1314     i965_render_sf_unit(ctx);
1315     i965_render_dest_surface_state(ctx, 0);
1316     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1317     i965_render_sampler(ctx);
1318     i965_render_wm_unit(ctx);
1319     i965_render_cc_viewport(ctx);
1320     i965_render_cc_unit(ctx);
1321     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1322     i965_render_upload_constants(ctx, obj_surface, flags);
1323 }
1324
1325 static void
1326 i965_subpic_render_state_setup(
1327     VADriverContextP   ctx,
1328     struct object_surface *obj_surface,
1329     const VARectangle *src_rect,
1330     const VARectangle *dst_rect
1331 )
1332 {
1333     i965_render_vs_unit(ctx);
1334     i965_render_sf_unit(ctx);
1335     i965_render_dest_surface_state(ctx, 0);
1336     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1337     i965_render_sampler(ctx);
1338     i965_subpic_render_wm_unit(ctx);
1339     i965_render_cc_viewport(ctx);
1340     i965_subpic_render_cc_unit(ctx);
1341     i965_subpic_render_upload_constants(ctx, obj_surface);
1342     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1343 }
1344
1345
1346 static void
1347 i965_render_pipeline_select(VADriverContextP ctx)
1348 {
1349     struct i965_driver_data *i965 = i965_driver_data(ctx);
1350     struct intel_batchbuffer *batch = i965->batch;
1351  
1352     BEGIN_BATCH(batch, 1);
1353     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1354     ADVANCE_BATCH(batch);
1355 }
1356
1357 static void
1358 i965_render_state_sip(VADriverContextP ctx)
1359 {
1360     struct i965_driver_data *i965 = i965_driver_data(ctx);
1361     struct intel_batchbuffer *batch = i965->batch;
1362
1363     BEGIN_BATCH(batch, 2);
1364     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1365     OUT_BATCH(batch, 0);
1366     ADVANCE_BATCH(batch);
1367 }
1368
1369 static void
1370 i965_render_state_base_address(VADriverContextP ctx)
1371 {
1372     struct i965_driver_data *i965 = i965_driver_data(ctx);
1373     struct intel_batchbuffer *batch = i965->batch;
1374     struct i965_render_state *render_state = &i965->render_state;
1375
1376     if (IS_IRONLAKE(i965->intel.device_id)) {
1377         BEGIN_BATCH(batch, 8);
1378         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1379         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1380         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1381         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1382         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1383         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1384         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1385         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386         ADVANCE_BATCH(batch);
1387     } else {
1388         BEGIN_BATCH(batch, 6);
1389         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1390         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1391         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1392         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1393         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1394         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1395         ADVANCE_BATCH(batch);
1396     }
1397 }
1398
1399 static void
1400 i965_render_binding_table_pointers(VADriverContextP ctx)
1401 {
1402     struct i965_driver_data *i965 = i965_driver_data(ctx);
1403     struct intel_batchbuffer *batch = i965->batch;
1404
1405     BEGIN_BATCH(batch, 6);
1406     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1407     OUT_BATCH(batch, 0); /* vs */
1408     OUT_BATCH(batch, 0); /* gs */
1409     OUT_BATCH(batch, 0); /* clip */
1410     OUT_BATCH(batch, 0); /* sf */
1411     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1412     ADVANCE_BATCH(batch);
1413 }
1414
1415 static void 
1416 i965_render_constant_color(VADriverContextP ctx)
1417 {
1418     struct i965_driver_data *i965 = i965_driver_data(ctx);
1419     struct intel_batchbuffer *batch = i965->batch;
1420
1421     BEGIN_BATCH(batch, 5);
1422     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1423     OUT_BATCH(batch, float_to_uint(1.0));
1424     OUT_BATCH(batch, float_to_uint(0.0));
1425     OUT_BATCH(batch, float_to_uint(1.0));
1426     OUT_BATCH(batch, float_to_uint(1.0));
1427     ADVANCE_BATCH(batch);
1428 }
1429
1430 static void
1431 i965_render_pipelined_pointers(VADriverContextP ctx)
1432 {
1433     struct i965_driver_data *i965 = i965_driver_data(ctx);
1434     struct intel_batchbuffer *batch = i965->batch;
1435     struct i965_render_state *render_state = &i965->render_state;
1436
1437     BEGIN_BATCH(batch, 7);
1438     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1439     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1440     OUT_BATCH(batch, 0);  /* disable GS */
1441     OUT_BATCH(batch, 0);  /* disable CLIP */
1442     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1443     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1444     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1445     ADVANCE_BATCH(batch);
1446 }
1447
1448 static void
1449 i965_render_urb_layout(VADriverContextP ctx)
1450 {
1451     struct i965_driver_data *i965 = i965_driver_data(ctx);
1452     struct intel_batchbuffer *batch = i965->batch;
1453     int urb_vs_start, urb_vs_size;
1454     int urb_gs_start, urb_gs_size;
1455     int urb_clip_start, urb_clip_size;
1456     int urb_sf_start, urb_sf_size;
1457     int urb_cs_start, urb_cs_size;
1458
1459     urb_vs_start = 0;
1460     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1461     urb_gs_start = urb_vs_start + urb_vs_size;
1462     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1463     urb_clip_start = urb_gs_start + urb_gs_size;
1464     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1465     urb_sf_start = urb_clip_start + urb_clip_size;
1466     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1467     urb_cs_start = urb_sf_start + urb_sf_size;
1468     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1469
1470     BEGIN_BATCH(batch, 3);
1471     OUT_BATCH(batch, 
1472               CMD_URB_FENCE |
1473               UF0_CS_REALLOC |
1474               UF0_SF_REALLOC |
1475               UF0_CLIP_REALLOC |
1476               UF0_GS_REALLOC |
1477               UF0_VS_REALLOC |
1478               1);
1479     OUT_BATCH(batch, 
1480               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1481               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1482               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1483     OUT_BATCH(batch,
1484               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1485               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1486     ADVANCE_BATCH(batch);
1487 }
1488
1489 static void 
1490 i965_render_cs_urb_layout(VADriverContextP ctx)
1491 {
1492     struct i965_driver_data *i965 = i965_driver_data(ctx);
1493     struct intel_batchbuffer *batch = i965->batch;
1494
1495     BEGIN_BATCH(batch, 2);
1496     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1497     OUT_BATCH(batch,
1498               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1499               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1500     ADVANCE_BATCH(batch);
1501 }
1502
1503 static void
1504 i965_render_constant_buffer(VADriverContextP ctx)
1505 {
1506     struct i965_driver_data *i965 = i965_driver_data(ctx);
1507     struct intel_batchbuffer *batch = i965->batch;
1508     struct i965_render_state *render_state = &i965->render_state;
1509
1510     BEGIN_BATCH(batch, 2);
1511     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1512     OUT_RELOC(batch, render_state->curbe.bo,
1513               I915_GEM_DOMAIN_INSTRUCTION, 0,
1514               URB_CS_ENTRY_SIZE - 1);
1515     ADVANCE_BATCH(batch);    
1516 }
1517
1518 static void
1519 i965_render_drawing_rectangle(VADriverContextP ctx)
1520 {
1521     struct i965_driver_data *i965 = i965_driver_data(ctx);
1522     struct intel_batchbuffer *batch = i965->batch;
1523     struct i965_render_state *render_state = &i965->render_state;
1524     struct intel_region *dest_region = render_state->draw_region;
1525
1526     BEGIN_BATCH(batch, 4);
1527     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1528     OUT_BATCH(batch, 0x00000000);
1529     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1530     OUT_BATCH(batch, 0x00000000);         
1531     ADVANCE_BATCH(batch);
1532 }
1533
1534 static void
1535 i965_render_vertex_elements(VADriverContextP ctx)
1536 {
1537     struct i965_driver_data *i965 = i965_driver_data(ctx);
1538     struct intel_batchbuffer *batch = i965->batch;
1539
1540     if (IS_IRONLAKE(i965->intel.device_id)) {
1541         BEGIN_BATCH(batch, 5);
1542         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1543         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1544         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1545                   VE0_VALID |
1546                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1547                   (0 << VE0_OFFSET_SHIFT));
1548         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1549                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1550                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1551                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1552         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1553         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1554                   VE0_VALID |
1555                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1556                   (8 << VE0_OFFSET_SHIFT));
1557         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1558                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1559                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1560                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1561         ADVANCE_BATCH(batch);
1562     } else {
1563         BEGIN_BATCH(batch, 5);
1564         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1565         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1566         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1567                   VE0_VALID |
1568                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1569                   (0 << VE0_OFFSET_SHIFT));
1570         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1571                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1572                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1573                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1574                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1575         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1576         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1577                   VE0_VALID |
1578                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1579                   (8 << VE0_OFFSET_SHIFT));
1580         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1581                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1582                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1583                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1584                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1585         ADVANCE_BATCH(batch);
1586     }
1587 }
1588
1589 static void
1590 i965_render_upload_image_palette(
1591     VADriverContextP ctx,
1592     struct object_image *obj_image,
1593     unsigned int     alpha
1594 )
1595 {
1596     struct i965_driver_data *i965 = i965_driver_data(ctx);
1597     struct intel_batchbuffer *batch = i965->batch;
1598     unsigned int i;
1599
1600     assert(obj_image);
1601
1602     if (!obj_image)
1603         return;
1604
1605     if (obj_image->image.num_palette_entries == 0)
1606         return;
1607
1608     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1609     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1610     /*fill palette*/
1611     //int32_t out[16]; //0-23:color 23-31:alpha
1612     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1613         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1614     ADVANCE_BATCH(batch);
1615 }
1616
1617 static void
1618 i965_render_startup(VADriverContextP ctx)
1619 {
1620     struct i965_driver_data *i965 = i965_driver_data(ctx);
1621     struct intel_batchbuffer *batch = i965->batch;
1622     struct i965_render_state *render_state = &i965->render_state;
1623
1624     BEGIN_BATCH(batch, 11);
1625     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1626     OUT_BATCH(batch, 
1627               (0 << VB0_BUFFER_INDEX_SHIFT) |
1628               VB0_VERTEXDATA |
1629               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1630     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1631
1632     if (IS_IRONLAKE(i965->intel.device_id))
1633         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1634     else
1635         OUT_BATCH(batch, 3);
1636
1637     OUT_BATCH(batch, 0);
1638
1639     OUT_BATCH(batch, 
1640               CMD_3DPRIMITIVE |
1641               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1642               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1643               (0 << 9) |
1644               4);
1645     OUT_BATCH(batch, 3); /* vertex count per instance */
1646     OUT_BATCH(batch, 0); /* start vertex offset */
1647     OUT_BATCH(batch, 1); /* single instance */
1648     OUT_BATCH(batch, 0); /* start instance location */
1649     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1650     ADVANCE_BATCH(batch);
1651 }
1652
1653 static void 
1654 i965_clear_dest_region(VADriverContextP ctx)
1655 {
1656     struct i965_driver_data *i965 = i965_driver_data(ctx);
1657     struct intel_batchbuffer *batch = i965->batch;
1658     struct i965_render_state *render_state = &i965->render_state;
1659     struct intel_region *dest_region = render_state->draw_region;
1660     unsigned int blt_cmd, br13;
1661     int pitch;
1662
1663     blt_cmd = XY_COLOR_BLT_CMD;
1664     br13 = 0xf0 << 16;
1665     pitch = dest_region->pitch;
1666
1667     if (dest_region->cpp == 4) {
1668         br13 |= BR13_8888;
1669         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1670     } else {
1671         assert(dest_region->cpp == 2);
1672         br13 |= BR13_565;
1673     }
1674
1675     if (dest_region->tiling != I915_TILING_NONE) {
1676         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1677         pitch /= 4;
1678     }
1679
1680     br13 |= pitch;
1681
1682     if (IS_GEN6(i965->intel.device_id) ||
1683         IS_GEN7(i965->intel.device_id) ||
1684         IS_GEN8(i965->intel.device_id)) {
1685         intel_batchbuffer_start_atomic_blt(batch, 24);
1686         BEGIN_BLT_BATCH(batch, 6);
1687     } else {
1688         intel_batchbuffer_start_atomic(batch, 24);
1689         BEGIN_BATCH(batch, 6);
1690     }
1691
1692     OUT_BATCH(batch, blt_cmd);
1693     OUT_BATCH(batch, br13);
1694     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1695     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1696               (dest_region->x + dest_region->width));
1697     OUT_RELOC(batch, dest_region->bo, 
1698               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1699               0);
1700     OUT_BATCH(batch, 0x0);
1701     ADVANCE_BATCH(batch);
1702     intel_batchbuffer_end_atomic(batch);
1703 }
1704
1705 static void 
1706 gen8_clear_dest_region(VADriverContextP ctx)
1707 {
1708     struct i965_driver_data *i965 = i965_driver_data(ctx);
1709     struct intel_batchbuffer *batch = i965->batch;
1710     struct i965_render_state *render_state = &i965->render_state;
1711     struct intel_region *dest_region = render_state->draw_region;
1712     unsigned int blt_cmd, br13;
1713     int pitch;
1714
1715     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
1716     br13 = 0xf0 << 16;
1717     pitch = dest_region->pitch;
1718
1719     if (dest_region->cpp == 4) {
1720         br13 |= BR13_8888;
1721         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1722     } else {
1723         assert(dest_region->cpp == 2);
1724         br13 |= BR13_565;
1725     }
1726
1727     if (dest_region->tiling != I915_TILING_NONE) {
1728         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1729         pitch /= 4;
1730     }
1731
1732     br13 |= pitch;
1733
1734     intel_batchbuffer_start_atomic_blt(batch, 24);
1735     BEGIN_BLT_BATCH(batch, 7);
1736
1737     OUT_BATCH(batch, blt_cmd);
1738     OUT_BATCH(batch, br13);
1739     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1740     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1741               (dest_region->x + dest_region->width));
1742     OUT_RELOC(batch, dest_region->bo, 
1743               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1744               0);
1745     OUT_BATCH(batch, 0x0);
1746     OUT_BATCH(batch, 0x0);
1747     ADVANCE_BATCH(batch);
1748     intel_batchbuffer_end_atomic(batch);
1749 }
1750
1751 static void
1752 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1753 {
1754     struct i965_driver_data *i965 = i965_driver_data(ctx);
1755     struct intel_batchbuffer *batch = i965->batch;
1756
1757     i965_clear_dest_region(ctx);
1758     intel_batchbuffer_start_atomic(batch, 0x1000);
1759     intel_batchbuffer_emit_mi_flush(batch);
1760     i965_render_pipeline_select(ctx);
1761     i965_render_state_sip(ctx);
1762     i965_render_state_base_address(ctx);
1763     i965_render_binding_table_pointers(ctx);
1764     i965_render_constant_color(ctx);
1765     i965_render_pipelined_pointers(ctx);
1766     i965_render_urb_layout(ctx);
1767     i965_render_cs_urb_layout(ctx);
1768     i965_render_constant_buffer(ctx);
1769     i965_render_drawing_rectangle(ctx);
1770     i965_render_vertex_elements(ctx);
1771     i965_render_startup(ctx);
1772     intel_batchbuffer_end_atomic(batch);
1773 }
1774
1775 static void
1776 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1777 {
1778     struct i965_driver_data *i965 = i965_driver_data(ctx);
1779     struct intel_batchbuffer *batch = i965->batch;
1780
1781     intel_batchbuffer_start_atomic(batch, 0x1000);
1782     intel_batchbuffer_emit_mi_flush(batch);
1783     i965_render_pipeline_select(ctx);
1784     i965_render_state_sip(ctx);
1785     i965_render_state_base_address(ctx);
1786     i965_render_binding_table_pointers(ctx);
1787     i965_render_constant_color(ctx);
1788     i965_render_pipelined_pointers(ctx);
1789     i965_render_urb_layout(ctx);
1790     i965_render_cs_urb_layout(ctx);
1791     i965_render_constant_buffer(ctx);
1792     i965_render_drawing_rectangle(ctx);
1793     i965_render_vertex_elements(ctx);
1794     i965_render_startup(ctx);
1795     intel_batchbuffer_end_atomic(batch);
1796 }
1797
1798
1799 static void 
1800 i965_render_initialize(VADriverContextP ctx)
1801 {
1802     struct i965_driver_data *i965 = i965_driver_data(ctx);
1803     struct i965_render_state *render_state = &i965->render_state;
1804     dri_bo *bo;
1805
1806     /* VERTEX BUFFER */
1807     dri_bo_unreference(render_state->vb.vertex_buffer);
1808     bo = dri_bo_alloc(i965->intel.bufmgr,
1809                       "vertex buffer",
1810                       4096,
1811                       4096);
1812     assert(bo);
1813     render_state->vb.vertex_buffer = bo;
1814
1815     /* VS */
1816     dri_bo_unreference(render_state->vs.state);
1817     bo = dri_bo_alloc(i965->intel.bufmgr,
1818                       "vs state",
1819                       sizeof(struct i965_vs_unit_state),
1820                       64);
1821     assert(bo);
1822     render_state->vs.state = bo;
1823
1824     /* GS */
1825     /* CLIP */
1826     /* SF */
1827     dri_bo_unreference(render_state->sf.state);
1828     bo = dri_bo_alloc(i965->intel.bufmgr,
1829                       "sf state",
1830                       sizeof(struct i965_sf_unit_state),
1831                       64);
1832     assert(bo);
1833     render_state->sf.state = bo;
1834
1835     /* WM */
1836     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1837     bo = dri_bo_alloc(i965->intel.bufmgr,
1838                       "surface state & binding table",
1839                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1840                       4096);
1841     assert(bo);
1842     render_state->wm.surface_state_binding_table_bo = bo;
1843
1844     dri_bo_unreference(render_state->wm.sampler);
1845     bo = dri_bo_alloc(i965->intel.bufmgr,
1846                       "sampler state",
1847                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1848                       64);
1849     assert(bo);
1850     render_state->wm.sampler = bo;
1851     render_state->wm.sampler_count = 0;
1852
1853     dri_bo_unreference(render_state->wm.state);
1854     bo = dri_bo_alloc(i965->intel.bufmgr,
1855                       "wm state",
1856                       sizeof(struct i965_wm_unit_state),
1857                       64);
1858     assert(bo);
1859     render_state->wm.state = bo;
1860
1861     /* COLOR CALCULATOR */
1862     dri_bo_unreference(render_state->cc.state);
1863     bo = dri_bo_alloc(i965->intel.bufmgr,
1864                       "color calc state",
1865                       sizeof(struct i965_cc_unit_state),
1866                       64);
1867     assert(bo);
1868     render_state->cc.state = bo;
1869
1870     dri_bo_unreference(render_state->cc.viewport);
1871     bo = dri_bo_alloc(i965->intel.bufmgr,
1872                       "cc viewport",
1873                       sizeof(struct i965_cc_viewport),
1874                       64);
1875     assert(bo);
1876     render_state->cc.viewport = bo;
1877 }
1878
1879 static void
1880 i965_render_put_surface(
1881     VADriverContextP   ctx,
1882     struct object_surface *obj_surface,
1883     const VARectangle *src_rect,
1884     const VARectangle *dst_rect,
1885     unsigned int       flags
1886 )
1887 {
1888     struct i965_driver_data *i965 = i965_driver_data(ctx);
1889     struct intel_batchbuffer *batch = i965->batch;
1890
1891     i965_render_initialize(ctx);
1892     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1893     i965_surface_render_pipeline_setup(ctx);
1894     intel_batchbuffer_flush(batch);
1895 }
1896
1897 static void
1898 i965_render_put_subpicture(
1899     VADriverContextP   ctx,
1900     struct object_surface *obj_surface,
1901     const VARectangle *src_rect,
1902     const VARectangle *dst_rect
1903 )
1904 {
1905     struct i965_driver_data *i965 = i965_driver_data(ctx);
1906     struct intel_batchbuffer *batch = i965->batch;
1907     unsigned int index = obj_surface->subpic_render_idx;
1908     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1909
1910     assert(obj_subpic);
1911
1912     i965_render_initialize(ctx);
1913     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1914     i965_subpic_render_pipeline_setup(ctx);
1915     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1916     intel_batchbuffer_flush(batch);
1917 }
1918
1919 /*
1920  * for GEN6+
1921  */
1922 static void 
1923 gen6_render_initialize(VADriverContextP ctx)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct i965_render_state *render_state = &i965->render_state;
1927     dri_bo *bo;
1928
1929     /* VERTEX BUFFER */
1930     dri_bo_unreference(render_state->vb.vertex_buffer);
1931     bo = dri_bo_alloc(i965->intel.bufmgr,
1932                       "vertex buffer",
1933                       4096,
1934                       4096);
1935     assert(bo);
1936     render_state->vb.vertex_buffer = bo;
1937
1938     /* WM */
1939     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1940     bo = dri_bo_alloc(i965->intel.bufmgr,
1941                       "surface state & binding table",
1942                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1943                       4096);
1944     assert(bo);
1945     render_state->wm.surface_state_binding_table_bo = bo;
1946
1947     dri_bo_unreference(render_state->wm.sampler);
1948     bo = dri_bo_alloc(i965->intel.bufmgr,
1949                       "sampler state",
1950                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1951                       4096);
1952     assert(bo);
1953     render_state->wm.sampler = bo;
1954     render_state->wm.sampler_count = 0;
1955
1956     /* COLOR CALCULATOR */
1957     dri_bo_unreference(render_state->cc.state);
1958     bo = dri_bo_alloc(i965->intel.bufmgr,
1959                       "color calc state",
1960                       sizeof(struct gen6_color_calc_state),
1961                       4096);
1962     assert(bo);
1963     render_state->cc.state = bo;
1964
1965     /* CC VIEWPORT */
1966     dri_bo_unreference(render_state->cc.viewport);
1967     bo = dri_bo_alloc(i965->intel.bufmgr,
1968                       "cc viewport",
1969                       sizeof(struct i965_cc_viewport),
1970                       4096);
1971     assert(bo);
1972     render_state->cc.viewport = bo;
1973
1974     /* BLEND STATE */
1975     dri_bo_unreference(render_state->cc.blend);
1976     bo = dri_bo_alloc(i965->intel.bufmgr,
1977                       "blend state",
1978                       sizeof(struct gen6_blend_state),
1979                       4096);
1980     assert(bo);
1981     render_state->cc.blend = bo;
1982
1983     /* DEPTH & STENCIL STATE */
1984     dri_bo_unreference(render_state->cc.depth_stencil);
1985     bo = dri_bo_alloc(i965->intel.bufmgr,
1986                       "depth & stencil state",
1987                       sizeof(struct gen6_depth_stencil_state),
1988                       4096);
1989     assert(bo);
1990     render_state->cc.depth_stencil = bo;
1991 }
1992
1993 static void
1994 gen6_render_color_calc_state(VADriverContextP ctx)
1995 {
1996     struct i965_driver_data *i965 = i965_driver_data(ctx);
1997     struct i965_render_state *render_state = &i965->render_state;
1998     struct gen6_color_calc_state *color_calc_state;
1999     
2000     dri_bo_map(render_state->cc.state, 1);
2001     assert(render_state->cc.state->virtual);
2002     color_calc_state = render_state->cc.state->virtual;
2003     memset(color_calc_state, 0, sizeof(*color_calc_state));
2004     color_calc_state->constant_r = 1.0;
2005     color_calc_state->constant_g = 0.0;
2006     color_calc_state->constant_b = 1.0;
2007     color_calc_state->constant_a = 1.0;
2008     dri_bo_unmap(render_state->cc.state);
2009 }
2010
2011 static void
2012 gen6_render_blend_state(VADriverContextP ctx)
2013 {
2014     struct i965_driver_data *i965 = i965_driver_data(ctx);
2015     struct i965_render_state *render_state = &i965->render_state;
2016     struct gen6_blend_state *blend_state;
2017     
2018     dri_bo_map(render_state->cc.blend, 1);
2019     assert(render_state->cc.blend->virtual);
2020     blend_state = render_state->cc.blend->virtual;
2021     memset(blend_state, 0, sizeof(*blend_state));
2022     blend_state->blend1.logic_op_enable = 1;
2023     blend_state->blend1.logic_op_func = 0xc;
2024     dri_bo_unmap(render_state->cc.blend);
2025 }
2026
2027 static void
2028 gen6_render_depth_stencil_state(VADriverContextP ctx)
2029 {
2030     struct i965_driver_data *i965 = i965_driver_data(ctx);
2031     struct i965_render_state *render_state = &i965->render_state;
2032     struct gen6_depth_stencil_state *depth_stencil_state;
2033     
2034     dri_bo_map(render_state->cc.depth_stencil, 1);
2035     assert(render_state->cc.depth_stencil->virtual);
2036     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2037     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2038     dri_bo_unmap(render_state->cc.depth_stencil);
2039 }
2040
2041 static void
2042 gen6_render_setup_states(
2043     VADriverContextP   ctx,
2044     struct object_surface *obj_surface,
2045     const VARectangle *src_rect,
2046     const VARectangle *dst_rect,
2047     unsigned int       flags
2048 )
2049 {
2050     i965_render_dest_surface_state(ctx, 0);
2051     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2052     i965_render_sampler(ctx);
2053     i965_render_cc_viewport(ctx);
2054     gen6_render_color_calc_state(ctx);
2055     gen6_render_blend_state(ctx);
2056     gen6_render_depth_stencil_state(ctx);
2057     i965_render_upload_constants(ctx, obj_surface, flags);
2058     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2059 }
2060
2061 static void
2062 gen6_emit_invarient_states(VADriverContextP ctx)
2063 {
2064     struct i965_driver_data *i965 = i965_driver_data(ctx);
2065     struct intel_batchbuffer *batch = i965->batch;
2066
2067     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2068
2069     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2070     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2071               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2072     OUT_BATCH(batch, 0);
2073
2074     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2075     OUT_BATCH(batch, 1);
2076
2077     /* Set system instruction pointer */
2078     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2079     OUT_BATCH(batch, 0);
2080 }
2081
2082 static void
2083 gen6_emit_state_base_address(VADriverContextP ctx)
2084 {
2085     struct i965_driver_data *i965 = i965_driver_data(ctx);
2086     struct intel_batchbuffer *batch = i965->batch;
2087     struct i965_render_state *render_state = &i965->render_state;
2088
2089     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2090     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2091     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2092     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2093     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2094     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2095     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2096     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2097     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2098     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2099 }
2100
2101 static void
2102 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
2103 {
2104     struct i965_driver_data *i965 = i965_driver_data(ctx);
2105     struct intel_batchbuffer *batch = i965->batch;
2106     struct i965_render_state *render_state = &i965->render_state;
2107
2108     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2109               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2110               (4 - 2));
2111     OUT_BATCH(batch, 0);
2112     OUT_BATCH(batch, 0);
2113     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2114 }
2115
2116 static void
2117 gen6_emit_urb(VADriverContextP ctx)
2118 {
2119     struct i965_driver_data *i965 = i965_driver_data(ctx);
2120     struct intel_batchbuffer *batch = i965->batch;
2121
2122     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
2123     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2124               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2125     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2126               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2127 }
2128
2129 static void
2130 gen6_emit_cc_state_pointers(VADriverContextP ctx)
2131 {
2132     struct i965_driver_data *i965 = i965_driver_data(ctx);
2133     struct intel_batchbuffer *batch = i965->batch;
2134     struct i965_render_state *render_state = &i965->render_state;
2135
2136     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2137     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2138     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2139     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2140 }
2141
2142 static void
2143 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
2144 {
2145     struct i965_driver_data *i965 = i965_driver_data(ctx);
2146     struct intel_batchbuffer *batch = i965->batch;
2147     struct i965_render_state *render_state = &i965->render_state;
2148
2149     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2150               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2151               (4 - 2));
2152     OUT_BATCH(batch, 0); /* VS */
2153     OUT_BATCH(batch, 0); /* GS */
2154     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2155 }
2156
2157 static void
2158 gen6_emit_binding_table(VADriverContextP ctx)
2159 {
2160     struct i965_driver_data *i965 = i965_driver_data(ctx);
2161     struct intel_batchbuffer *batch = i965->batch;
2162
2163     /* Binding table pointers */
2164     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
2165               GEN6_BINDING_TABLE_MODIFY_PS |
2166               (4 - 2));
2167     OUT_BATCH(batch, 0);                /* vs */
2168     OUT_BATCH(batch, 0);                /* gs */
2169     /* Only the PS uses the binding table */
2170     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2171 }
2172
2173 static void
2174 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2175 {
2176     struct i965_driver_data *i965 = i965_driver_data(ctx);
2177     struct intel_batchbuffer *batch = i965->batch;
2178
2179     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2180     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2181               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2182     OUT_BATCH(batch, 0);
2183     OUT_BATCH(batch, 0);
2184     OUT_BATCH(batch, 0);
2185     OUT_BATCH(batch, 0);
2186     OUT_BATCH(batch, 0);
2187
2188     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2189     OUT_BATCH(batch, 0);
2190 }
2191
2192 static void
2193 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2194 {
2195     i965_render_drawing_rectangle(ctx);
2196 }
2197
2198 static void 
2199 gen6_emit_vs_state(VADriverContextP ctx)
2200 {
2201     struct i965_driver_data *i965 = i965_driver_data(ctx);
2202     struct intel_batchbuffer *batch = i965->batch;
2203
2204     /* disable VS constant buffer */
2205     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2206     OUT_BATCH(batch, 0);
2207     OUT_BATCH(batch, 0);
2208     OUT_BATCH(batch, 0);
2209     OUT_BATCH(batch, 0);
2210         
2211     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2212     OUT_BATCH(batch, 0); /* without VS kernel */
2213     OUT_BATCH(batch, 0);
2214     OUT_BATCH(batch, 0);
2215     OUT_BATCH(batch, 0);
2216     OUT_BATCH(batch, 0); /* pass-through */
2217 }
2218
2219 static void 
2220 gen6_emit_gs_state(VADriverContextP ctx)
2221 {
2222     struct i965_driver_data *i965 = i965_driver_data(ctx);
2223     struct intel_batchbuffer *batch = i965->batch;
2224
2225     /* disable GS constant buffer */
2226     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2227     OUT_BATCH(batch, 0);
2228     OUT_BATCH(batch, 0);
2229     OUT_BATCH(batch, 0);
2230     OUT_BATCH(batch, 0);
2231         
2232     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2233     OUT_BATCH(batch, 0); /* without GS kernel */
2234     OUT_BATCH(batch, 0);
2235     OUT_BATCH(batch, 0);
2236     OUT_BATCH(batch, 0);
2237     OUT_BATCH(batch, 0);
2238     OUT_BATCH(batch, 0); /* pass-through */
2239 }
2240
2241 static void 
2242 gen6_emit_clip_state(VADriverContextP ctx)
2243 {
2244     struct i965_driver_data *i965 = i965_driver_data(ctx);
2245     struct intel_batchbuffer *batch = i965->batch;
2246
2247     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2248     OUT_BATCH(batch, 0);
2249     OUT_BATCH(batch, 0); /* pass-through */
2250     OUT_BATCH(batch, 0);
2251 }
2252
2253 static void 
2254 gen6_emit_sf_state(VADriverContextP ctx)
2255 {
2256     struct i965_driver_data *i965 = i965_driver_data(ctx);
2257     struct intel_batchbuffer *batch = i965->batch;
2258
2259     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2260     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2261               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2262               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2263     OUT_BATCH(batch, 0);
2264     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2265     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2266     OUT_BATCH(batch, 0);
2267     OUT_BATCH(batch, 0);
2268     OUT_BATCH(batch, 0);
2269     OUT_BATCH(batch, 0);
2270     OUT_BATCH(batch, 0); /* DW9 */
2271     OUT_BATCH(batch, 0);
2272     OUT_BATCH(batch, 0);
2273     OUT_BATCH(batch, 0);
2274     OUT_BATCH(batch, 0);
2275     OUT_BATCH(batch, 0); /* DW14 */
2276     OUT_BATCH(batch, 0);
2277     OUT_BATCH(batch, 0);
2278     OUT_BATCH(batch, 0);
2279     OUT_BATCH(batch, 0);
2280     OUT_BATCH(batch, 0); /* DW19 */
2281 }
2282
2283 static void 
2284 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2285 {
2286     struct i965_driver_data *i965 = i965_driver_data(ctx);
2287     struct intel_batchbuffer *batch = i965->batch;
2288     struct i965_render_state *render_state = &i965->render_state;
2289
2290     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2291               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2292               (5 - 2));
2293     OUT_RELOC(batch, 
2294               render_state->curbe.bo,
2295               I915_GEM_DOMAIN_INSTRUCTION, 0,
2296               (URB_CS_ENTRY_SIZE-1));
2297     OUT_BATCH(batch, 0);
2298     OUT_BATCH(batch, 0);
2299     OUT_BATCH(batch, 0);
2300
2301     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2302     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2303               I915_GEM_DOMAIN_INSTRUCTION, 0,
2304               0);
2305     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2306               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2307     OUT_BATCH(batch, 0);
2308     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2309     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2310               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2311               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2312     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2313               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2314     OUT_BATCH(batch, 0);
2315     OUT_BATCH(batch, 0);
2316 }
2317
2318 static void
2319 gen6_emit_vertex_element_state(VADriverContextP ctx)
2320 {
2321     struct i965_driver_data *i965 = i965_driver_data(ctx);
2322     struct intel_batchbuffer *batch = i965->batch;
2323
2324     /* Set up our vertex elements, sourced from the single vertex buffer. */
2325     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2326     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2327     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2328               GEN6_VE0_VALID |
2329               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2330               (0 << VE0_OFFSET_SHIFT));
2331     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2332               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2333               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2334               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2335     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2336     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2337               GEN6_VE0_VALID |
2338               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2339               (8 << VE0_OFFSET_SHIFT));
2340     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2341               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2342               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2343               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2344 }
2345
2346 static void
2347 gen6_emit_vertices(VADriverContextP ctx)
2348 {
2349     struct i965_driver_data *i965 = i965_driver_data(ctx);
2350     struct intel_batchbuffer *batch = i965->batch;
2351     struct i965_render_state *render_state = &i965->render_state;
2352
2353     BEGIN_BATCH(batch, 11);
2354     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2355     OUT_BATCH(batch, 
2356               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2357               GEN6_VB0_VERTEXDATA |
2358               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2359     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2360     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2361     OUT_BATCH(batch, 0);
2362
2363     OUT_BATCH(batch, 
2364               CMD_3DPRIMITIVE |
2365               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2366               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2367               (0 << 9) |
2368               4);
2369     OUT_BATCH(batch, 3); /* vertex count per instance */
2370     OUT_BATCH(batch, 0); /* start vertex offset */
2371     OUT_BATCH(batch, 1); /* single instance */
2372     OUT_BATCH(batch, 0); /* start instance location */
2373     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2374     ADVANCE_BATCH(batch);
2375 }
2376
2377 static void
2378 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct intel_batchbuffer *batch = i965->batch;
2382
2383     intel_batchbuffer_start_atomic(batch, 0x1000);
2384     intel_batchbuffer_emit_mi_flush(batch);
2385     gen6_emit_invarient_states(ctx);
2386     gen6_emit_state_base_address(ctx);
2387     gen6_emit_viewport_state_pointers(ctx);
2388     gen6_emit_urb(ctx);
2389     gen6_emit_cc_state_pointers(ctx);
2390     gen6_emit_sampler_state_pointers(ctx);
2391     gen6_emit_vs_state(ctx);
2392     gen6_emit_gs_state(ctx);
2393     gen6_emit_clip_state(ctx);
2394     gen6_emit_sf_state(ctx);
2395     gen6_emit_wm_state(ctx, kernel);
2396     gen6_emit_binding_table(ctx);
2397     gen6_emit_depth_buffer_state(ctx);
2398     gen6_emit_drawing_rectangle(ctx);
2399     gen6_emit_vertex_element_state(ctx);
2400     gen6_emit_vertices(ctx);
2401     intel_batchbuffer_end_atomic(batch);
2402 }
2403
2404 static void
2405 gen6_render_put_surface(
2406     VADriverContextP   ctx,
2407     struct object_surface *obj_surface,
2408     const VARectangle *src_rect,
2409     const VARectangle *dst_rect,
2410     unsigned int       flags
2411 )
2412 {
2413     struct i965_driver_data *i965 = i965_driver_data(ctx);
2414     struct intel_batchbuffer *batch = i965->batch;
2415
2416     gen6_render_initialize(ctx);
2417     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2418     i965_clear_dest_region(ctx);
2419     gen6_render_emit_states(ctx, PS_KERNEL);
2420     intel_batchbuffer_flush(batch);
2421 }
2422
2423 static void
2424 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2425 {
2426     struct i965_driver_data *i965 = i965_driver_data(ctx);
2427     struct i965_render_state *render_state = &i965->render_state;
2428     struct gen6_blend_state *blend_state;
2429
2430     dri_bo_unmap(render_state->cc.state);    
2431     dri_bo_map(render_state->cc.blend, 1);
2432     assert(render_state->cc.blend->virtual);
2433     blend_state = render_state->cc.blend->virtual;
2434     memset(blend_state, 0, sizeof(*blend_state));
2435     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2436     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2437     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2438     blend_state->blend0.blend_enable = 1;
2439     blend_state->blend1.post_blend_clamp_enable = 1;
2440     blend_state->blend1.pre_blend_clamp_enable = 1;
2441     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2442     dri_bo_unmap(render_state->cc.blend);
2443 }
2444
2445 static void
2446 gen6_subpicture_render_setup_states(
2447     VADriverContextP   ctx,
2448     struct object_surface *obj_surface,
2449     const VARectangle *src_rect,
2450     const VARectangle *dst_rect
2451 )
2452 {
2453     i965_render_dest_surface_state(ctx, 0);
2454     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2455     i965_render_sampler(ctx);
2456     i965_render_cc_viewport(ctx);
2457     gen6_render_color_calc_state(ctx);
2458     gen6_subpicture_render_blend_state(ctx);
2459     gen6_render_depth_stencil_state(ctx);
2460     i965_subpic_render_upload_constants(ctx, obj_surface);
2461     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2462 }
2463
2464 static void
2465 gen6_render_put_subpicture(
2466     VADriverContextP   ctx,
2467     struct object_surface *obj_surface,
2468     const VARectangle *src_rect,
2469     const VARectangle *dst_rect
2470 )
2471 {
2472     struct i965_driver_data *i965 = i965_driver_data(ctx);
2473     struct intel_batchbuffer *batch = i965->batch;
2474     unsigned int index = obj_surface->subpic_render_idx;
2475     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2476
2477     assert(obj_subpic);
2478     gen6_render_initialize(ctx);
2479     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2480     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2481     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2482     intel_batchbuffer_flush(batch);
2483 }
2484
2485 /*
2486  * for GEN7
2487  */
2488 static void 
2489 gen7_render_initialize(VADriverContextP ctx)
2490 {
2491     struct i965_driver_data *i965 = i965_driver_data(ctx);
2492     struct i965_render_state *render_state = &i965->render_state;
2493     dri_bo *bo;
2494     int size;
2495
2496     /* VERTEX BUFFER */
2497     dri_bo_unreference(render_state->vb.vertex_buffer);
2498     bo = dri_bo_alloc(i965->intel.bufmgr,
2499                       "vertex buffer",
2500                       4096,
2501                       4096);
2502     assert(bo);
2503     render_state->vb.vertex_buffer = bo;
2504
2505     /* WM */
2506     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2507     bo = dri_bo_alloc(i965->intel.bufmgr,
2508                       "surface state & binding table",
2509                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2510                       4096);
2511     assert(bo);
2512     render_state->wm.surface_state_binding_table_bo = bo;
2513
2514     dri_bo_unreference(render_state->wm.sampler);
2515     bo = dri_bo_alloc(i965->intel.bufmgr,
2516                       "sampler state",
2517                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2518                       4096);
2519     assert(bo);
2520     render_state->wm.sampler = bo;
2521     render_state->wm.sampler_count = 0;
2522
2523     /* COLOR CALCULATOR */
2524     dri_bo_unreference(render_state->cc.state);
2525     bo = dri_bo_alloc(i965->intel.bufmgr,
2526                       "color calc state",
2527                       sizeof(struct gen6_color_calc_state),
2528                       4096);
2529     assert(bo);
2530     render_state->cc.state = bo;
2531
2532     /* CC VIEWPORT */
2533     dri_bo_unreference(render_state->cc.viewport);
2534     bo = dri_bo_alloc(i965->intel.bufmgr,
2535                       "cc viewport",
2536                       sizeof(struct i965_cc_viewport),
2537                       4096);
2538     assert(bo);
2539     render_state->cc.viewport = bo;
2540
2541     /* BLEND STATE */
2542     dri_bo_unreference(render_state->cc.blend);
2543     size = sizeof(struct gen8_global_blend_state) + 2 * sizeof(struct gen8_blend_state_rt);
2544     bo = dri_bo_alloc(i965->intel.bufmgr,
2545                       "blend state",
2546                       size,
2547                       4096);
2548     assert(bo);
2549     render_state->cc.blend = bo;
2550
2551     /* DEPTH & STENCIL STATE */
2552     dri_bo_unreference(render_state->cc.depth_stencil);
2553     bo = dri_bo_alloc(i965->intel.bufmgr,
2554                       "depth & stencil state",
2555                       sizeof(struct gen6_depth_stencil_state),
2556                       4096);
2557     assert(bo);
2558     render_state->cc.depth_stencil = bo;
2559 }
2560
2561 /*
2562  * for GEN8
2563  */
2564 static void 
2565 gen8_render_initialize(VADriverContextP ctx)
2566 {
2567     struct i965_driver_data *i965 = i965_driver_data(ctx);
2568     struct i965_render_state *render_state = &i965->render_state;
2569     dri_bo *bo;
2570     int size;
2571     unsigned int end_offset;
2572
2573     /* VERTEX BUFFER */
2574     dri_bo_unreference(render_state->vb.vertex_buffer);
2575     bo = dri_bo_alloc(i965->intel.bufmgr,
2576                       "vertex buffer",
2577                       4096,
2578                       4096);
2579     assert(bo);
2580     render_state->vb.vertex_buffer = bo;
2581
2582     /* WM */
2583     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2584     bo = dri_bo_alloc(i965->intel.bufmgr,
2585                       "surface state & binding table",
2586                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2587                       4096);
2588     assert(bo);
2589     render_state->wm.surface_state_binding_table_bo = bo;
2590
2591     render_state->curbe_size = 256;
2592
2593     render_state->wm.sampler_count = 0;
2594
2595     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
2596
2597     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
2598
2599     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
2600
2601     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
2602                         16 * sizeof(struct gen8_blend_state_rt);
2603
2604     render_state->sf_clip_size = 1024;
2605
2606     render_state->scissor_size = 1024;
2607
2608     size = 4096 + render_state->curbe_size + render_state->sampler_size +
2609                 render_state->cc_state_size + render_state->cc_viewport_size +
2610                 render_state->blend_state_size + render_state->sf_clip_size +
2611                 render_state->scissor_size;
2612
2613     dri_bo_unreference(render_state->dynamic_state.bo);
2614     bo = dri_bo_alloc(i965->intel.bufmgr,
2615                       "dynamic_state",
2616                       size,
2617                       4096);
2618
2619     render_state->dynamic_state.bo = bo;
2620
2621     end_offset = 0;
2622     render_state->dynamic_state.end_offset = 0;
2623
2624     /* Constant buffer offset */
2625     render_state->curbe_offset = ALIGN(end_offset, 64);
2626     end_offset += render_state->curbe_size;
2627
2628     /* Sampler_state  */
2629     render_state->sampler_offset = ALIGN(end_offset, 64);
2630     end_offset += render_state->sampler_size;
2631
2632     /* CC_VIEWPORT_state  */
2633     render_state->cc_viewport_offset = ALIGN(end_offset, 64);
2634     end_offset += render_state->cc_viewport_size;
2635
2636     /* CC_STATE_state  */
2637     render_state->cc_state_offset = ALIGN(end_offset, 64);
2638     end_offset += render_state->cc_state_size;
2639
2640     /* Blend_state  */
2641     render_state->blend_state_offset = ALIGN(end_offset, 64);
2642     end_offset += render_state->blend_state_size;
2643
2644     /* SF_CLIP_state  */
2645     render_state->sf_clip_offset = ALIGN(end_offset, 64);
2646     end_offset += render_state->sf_clip_size;
2647
2648     /* SCISSOR_state  */
2649     render_state->scissor_offset = ALIGN(end_offset, 64);
2650     end_offset += render_state->scissor_size;
2651
2652     /* update the end offset of dynamic_state */
2653     render_state->dynamic_state.end_offset = ALIGN(end_offset, 64);
2654
2655 }
2656
2657 static void
2658 gen7_render_color_calc_state(VADriverContextP ctx)
2659 {
2660     struct i965_driver_data *i965 = i965_driver_data(ctx);
2661     struct i965_render_state *render_state = &i965->render_state;
2662     struct gen6_color_calc_state *color_calc_state;
2663     
2664     dri_bo_map(render_state->cc.state, 1);
2665     assert(render_state->cc.state->virtual);
2666     color_calc_state = render_state->cc.state->virtual;
2667     memset(color_calc_state, 0, sizeof(*color_calc_state));
2668     color_calc_state->constant_r = 1.0;
2669     color_calc_state->constant_g = 0.0;
2670     color_calc_state->constant_b = 1.0;
2671     color_calc_state->constant_a = 1.0;
2672     dri_bo_unmap(render_state->cc.state);
2673 }
2674
2675 static void
2676 gen7_render_blend_state(VADriverContextP ctx)
2677 {
2678     struct i965_driver_data *i965 = i965_driver_data(ctx);
2679     struct i965_render_state *render_state = &i965->render_state;
2680     struct gen6_blend_state *blend_state;
2681     
2682     dri_bo_map(render_state->cc.blend, 1);
2683     assert(render_state->cc.blend->virtual);
2684     blend_state = render_state->cc.blend->virtual;
2685     memset(blend_state, 0, sizeof(*blend_state));
2686     blend_state->blend1.logic_op_enable = 1;
2687     blend_state->blend1.logic_op_func = 0xc;
2688     blend_state->blend1.pre_blend_clamp_enable = 1;
2689     dri_bo_unmap(render_state->cc.blend);
2690 }
2691
2692 static void
2693 gen7_render_depth_stencil_state(VADriverContextP ctx)
2694 {
2695     struct i965_driver_data *i965 = i965_driver_data(ctx);
2696     struct i965_render_state *render_state = &i965->render_state;
2697     struct gen6_depth_stencil_state *depth_stencil_state;
2698     
2699     dri_bo_map(render_state->cc.depth_stencil, 1);
2700     assert(render_state->cc.depth_stencil->virtual);
2701     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2702     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2703     dri_bo_unmap(render_state->cc.depth_stencil);
2704 }
2705
2706 static void 
2707 gen7_render_sampler(VADriverContextP ctx)
2708 {
2709     struct i965_driver_data *i965 = i965_driver_data(ctx);
2710     struct i965_render_state *render_state = &i965->render_state;
2711     struct gen7_sampler_state *sampler_state;
2712     int i;
2713     
2714     assert(render_state->wm.sampler_count > 0);
2715     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2716
2717     dri_bo_map(render_state->wm.sampler, 1);
2718     assert(render_state->wm.sampler->virtual);
2719     sampler_state = render_state->wm.sampler->virtual;
2720     for (i = 0; i < render_state->wm.sampler_count; i++) {
2721         memset(sampler_state, 0, sizeof(*sampler_state));
2722         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2723         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2724         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2725         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2726         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2727         sampler_state++;
2728     }
2729
2730     dri_bo_unmap(render_state->wm.sampler);
2731 }
2732
2733 static void 
2734 gen8_render_sampler(VADriverContextP ctx)
2735 {
2736     struct i965_driver_data *i965 = i965_driver_data(ctx);
2737     struct i965_render_state *render_state = &i965->render_state;
2738     struct gen8_sampler_state *sampler_state;
2739     int i;
2740     unsigned char *cc_ptr;
2741     
2742     assert(render_state->wm.sampler_count > 0);
2743     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2744
2745     dri_bo_map(render_state->dynamic_state.bo, 1);
2746     assert(render_state->dynamic_state.bo->virtual);
2747
2748     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2749                         render_state->sampler_offset;
2750
2751     sampler_state = (struct gen8_sampler_state *) cc_ptr;
2752
2753     for (i = 0; i < render_state->wm.sampler_count; i++) {
2754         memset(sampler_state, 0, sizeof(*sampler_state));
2755         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2756         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2757         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2758         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2759         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2760         sampler_state++;
2761     }
2762
2763     dri_bo_unmap(render_state->dynamic_state.bo);
2764 }
2765
2766
2767 static void
2768 gen7_render_setup_states(
2769     VADriverContextP   ctx,
2770     struct object_surface *obj_surface,
2771     const VARectangle *src_rect,
2772     const VARectangle *dst_rect,
2773     unsigned int       flags
2774 )
2775 {
2776     i965_render_dest_surface_state(ctx, 0);
2777     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2778     gen7_render_sampler(ctx);
2779     i965_render_cc_viewport(ctx);
2780     gen7_render_color_calc_state(ctx);
2781     gen7_render_blend_state(ctx);
2782     gen7_render_depth_stencil_state(ctx);
2783     i965_render_upload_constants(ctx, obj_surface, flags);
2784     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2785 }
2786
2787 static void
2788 gen8_render_blend_state(VADriverContextP ctx)
2789 {
2790     struct i965_driver_data *i965 = i965_driver_data(ctx);
2791     struct i965_render_state *render_state = &i965->render_state;
2792     struct gen8_global_blend_state *global_blend_state;
2793     struct gen8_blend_state_rt *blend_state;
2794     unsigned char *cc_ptr;
2795     
2796     dri_bo_map(render_state->dynamic_state.bo, 1);
2797     assert(render_state->dynamic_state.bo->virtual);
2798
2799     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2800                         render_state->blend_state_offset;
2801
2802     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
2803
2804     memset(global_blend_state, 0, sizeof(*global_blend_state));
2805     /* Global blend state + blend_state for Render Target */
2806     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
2807     blend_state->blend1.logic_op_enable = 1;
2808     blend_state->blend1.logic_op_func = 0xc;
2809     blend_state->blend1.pre_blend_clamp_enable = 1;
2810
2811     dri_bo_unmap(render_state->dynamic_state.bo);
2812 }
2813
2814
2815 static void 
2816 gen8_render_cc_viewport(VADriverContextP ctx)
2817 {
2818     struct i965_driver_data *i965 = i965_driver_data(ctx);
2819     struct i965_render_state *render_state = &i965->render_state;
2820     struct i965_cc_viewport *cc_viewport;
2821     unsigned char *cc_ptr;
2822
2823     dri_bo_map(render_state->dynamic_state.bo, 1);
2824     assert(render_state->dynamic_state.bo->virtual);
2825
2826     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2827                         render_state->cc_viewport_offset;
2828
2829     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
2830
2831     memset(cc_viewport, 0, sizeof(*cc_viewport));
2832     
2833     cc_viewport->min_depth = -1.e35;
2834     cc_viewport->max_depth = 1.e35;
2835
2836     dri_bo_unmap(render_state->dynamic_state.bo);
2837 }
2838
2839 static void
2840 gen8_render_color_calc_state(VADriverContextP ctx)
2841 {
2842     struct i965_driver_data *i965 = i965_driver_data(ctx);
2843     struct i965_render_state *render_state = &i965->render_state;
2844     struct gen6_color_calc_state *color_calc_state;
2845     unsigned char *cc_ptr;
2846
2847     dri_bo_map(render_state->dynamic_state.bo, 1);
2848     assert(render_state->dynamic_state.bo->virtual);
2849
2850     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2851                         render_state->cc_state_offset;
2852
2853     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
2854
2855     memset(color_calc_state, 0, sizeof(*color_calc_state));
2856     color_calc_state->constant_r = 1.0;
2857     color_calc_state->constant_g = 0.0;
2858     color_calc_state->constant_b = 1.0;
2859     color_calc_state->constant_a = 1.0;
2860     dri_bo_unmap(render_state->dynamic_state.bo);
2861 }
2862
2863 static void
2864 gen8_render_upload_constants(VADriverContextP ctx,
2865                              struct object_surface *obj_surface)
2866 {
2867     struct i965_driver_data *i965 = i965_driver_data(ctx);
2868     struct i965_render_state *render_state = &i965->render_state;
2869     unsigned short *constant_buffer;
2870     unsigned char *cc_ptr;
2871     float *color_balance_base;
2872     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
2873     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
2874     float hue = (float)i965->hue_attrib->value / 180 * PI;
2875     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
2876
2877     dri_bo_map(render_state->dynamic_state.bo, 1);
2878     assert(render_state->dynamic_state.bo->virtual);
2879
2880     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2881                         render_state->curbe_offset;
2882
2883     constant_buffer = (unsigned short *) cc_ptr;
2884
2885     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
2886         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
2887
2888         *constant_buffer = 2;
2889     } else {
2890         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
2891             *constant_buffer = 1;
2892         else
2893             *constant_buffer = 0;
2894     }
2895
2896     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
2897         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
2898         i965->hue_attrib->value == DEFAULT_HUE &&
2899         i965->saturation_attrib->value == DEFAULT_SATURATION)
2900         constant_buffer[1] = 1; /* skip color balance transformation */
2901     else
2902         constant_buffer[1] = 0;
2903
2904     color_balance_base = (float *)constant_buffer + 4;
2905     *color_balance_base++ = contrast;
2906     *color_balance_base++ = brightness;
2907     *color_balance_base++ = cos(hue) * contrast * saturation;
2908     *color_balance_base++ = sin(hue) * contrast * saturation;
2909
2910     dri_bo_unmap(render_state->dynamic_state.bo);
2911 }
2912
2913 static void
2914 gen8_render_setup_states(
2915     VADriverContextP   ctx,
2916     struct object_surface *obj_surface,
2917     const VARectangle *src_rect,
2918     const VARectangle *dst_rect,
2919     unsigned int       flags
2920 )
2921 {
2922     i965_render_dest_surface_state(ctx, 0);
2923     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2924     gen8_render_sampler(ctx);
2925     gen8_render_cc_viewport(ctx);
2926     gen8_render_color_calc_state(ctx);
2927     gen8_render_blend_state(ctx);
2928     gen8_render_upload_constants(ctx, obj_surface);
2929     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2930 }
2931
2932 static void
2933 gen7_emit_invarient_states(VADriverContextP ctx)
2934 {
2935     struct i965_driver_data *i965 = i965_driver_data(ctx);
2936     struct intel_batchbuffer *batch = i965->batch;
2937
2938     BEGIN_BATCH(batch, 1);
2939     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2940     ADVANCE_BATCH(batch);
2941
2942     BEGIN_BATCH(batch, 4);
2943     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2944     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2945               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2946     OUT_BATCH(batch, 0);
2947     OUT_BATCH(batch, 0);
2948     ADVANCE_BATCH(batch);
2949
2950     BEGIN_BATCH(batch, 2);
2951     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2952     OUT_BATCH(batch, 1);
2953     ADVANCE_BATCH(batch);
2954
2955     /* Set system instruction pointer */
2956     BEGIN_BATCH(batch, 2);
2957     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2958     OUT_BATCH(batch, 0);
2959     ADVANCE_BATCH(batch);
2960 }
2961
2962 static void
2963 gen7_emit_state_base_address(VADriverContextP ctx)
2964 {
2965     struct i965_driver_data *i965 = i965_driver_data(ctx);
2966     struct intel_batchbuffer *batch = i965->batch;
2967     struct i965_render_state *render_state = &i965->render_state;
2968
2969     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2970     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2971     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2972     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2973     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2974     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2975     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2976     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2977     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2978     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2979 }
2980
2981 static void
2982 gen8_emit_state_base_address(VADriverContextP ctx)
2983 {
2984     struct i965_driver_data *i965 = i965_driver_data(ctx);
2985     struct intel_batchbuffer *batch = i965->batch;
2986     struct i965_render_state *render_state = &i965->render_state;
2987
2988     BEGIN_BATCH(batch, 16);
2989     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
2990     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2991         OUT_BATCH(batch, 0);
2992         OUT_BATCH(batch, 0);
2993         /*DW4 */
2994     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2995         OUT_BATCH(batch, 0);
2996
2997         /*DW6*/
2998     /* Dynamic state base address */
2999     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
3000                 0, BASE_ADDRESS_MODIFY);
3001     OUT_BATCH(batch, 0);
3002
3003         /*DW8*/
3004     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
3005     OUT_BATCH(batch, 0);
3006
3007         /*DW10 */
3008     /* Instruction base address */
3009     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
3010     OUT_BATCH(batch, 0);
3011
3012         /*DW12 */       
3013     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
3014     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
3015     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
3016     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
3017     ADVANCE_BATCH(batch);
3018 }
3019
3020 static void
3021 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
3022 {
3023     struct i965_driver_data *i965 = i965_driver_data(ctx);
3024     struct intel_batchbuffer *batch = i965->batch;
3025     struct i965_render_state *render_state = &i965->render_state;
3026
3027     BEGIN_BATCH(batch, 2);
3028     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
3029     OUT_RELOC(batch,
3030               render_state->cc.viewport,
3031               I915_GEM_DOMAIN_INSTRUCTION, 0,
3032               0);
3033     ADVANCE_BATCH(batch);
3034
3035     BEGIN_BATCH(batch, 2);
3036     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
3037     OUT_BATCH(batch, 0);
3038     ADVANCE_BATCH(batch);
3039 }
3040
3041 /*
3042  * URB layout on GEN7 
3043  * ----------------------------------------
3044  * | PS Push Constants (8KB) | VS entries |
3045  * ----------------------------------------
3046  */
3047 static void
3048 gen7_emit_urb(VADriverContextP ctx)
3049 {
3050     struct i965_driver_data *i965 = i965_driver_data(ctx);
3051     struct intel_batchbuffer *batch = i965->batch;
3052     unsigned int num_urb_entries = 32;
3053
3054     if (IS_HASWELL(i965->intel.device_id))
3055         num_urb_entries = 64;
3056
3057     BEGIN_BATCH(batch, 2);
3058     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3059     OUT_BATCH(batch, 8); /* in 1KBs */
3060     ADVANCE_BATCH(batch);
3061
3062     BEGIN_BATCH(batch, 2);
3063     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3064     OUT_BATCH(batch, 
3065               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3066               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3067               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3068    ADVANCE_BATCH(batch);
3069
3070    BEGIN_BATCH(batch, 2);
3071    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3072    OUT_BATCH(batch,
3073              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3074              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3075    ADVANCE_BATCH(batch);
3076
3077    BEGIN_BATCH(batch, 2);
3078    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3079    OUT_BATCH(batch,
3080              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3081              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3082    ADVANCE_BATCH(batch);
3083
3084    BEGIN_BATCH(batch, 2);
3085    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3086    OUT_BATCH(batch,
3087              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3088              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3089    ADVANCE_BATCH(batch);
3090 }
3091
3092 static void
3093 gen7_emit_cc_state_pointers(VADriverContextP ctx)
3094 {
3095     struct i965_driver_data *i965 = i965_driver_data(ctx);
3096     struct intel_batchbuffer *batch = i965->batch;
3097     struct i965_render_state *render_state = &i965->render_state;
3098
3099     BEGIN_BATCH(batch, 2);
3100     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3101     OUT_RELOC(batch,
3102               render_state->cc.state,
3103               I915_GEM_DOMAIN_INSTRUCTION, 0,
3104               1);
3105     ADVANCE_BATCH(batch);
3106
3107     BEGIN_BATCH(batch, 2);
3108     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3109     OUT_RELOC(batch,
3110               render_state->cc.blend,
3111               I915_GEM_DOMAIN_INSTRUCTION, 0,
3112               1);
3113     ADVANCE_BATCH(batch);
3114
3115     BEGIN_BATCH(batch, 2);
3116     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
3117     OUT_RELOC(batch,
3118               render_state->cc.depth_stencil,
3119               I915_GEM_DOMAIN_INSTRUCTION, 0, 
3120               1);
3121     ADVANCE_BATCH(batch);
3122 }
3123
3124 static void
3125 gen8_emit_cc_state_pointers(VADriverContextP ctx)
3126 {
3127     struct i965_driver_data *i965 = i965_driver_data(ctx);
3128     struct intel_batchbuffer *batch = i965->batch;
3129     struct i965_render_state *render_state = &i965->render_state;
3130
3131     BEGIN_BATCH(batch, 2);
3132     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3133     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
3134     ADVANCE_BATCH(batch);
3135
3136     BEGIN_BATCH(batch, 2);
3137     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3138     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
3139     ADVANCE_BATCH(batch);
3140
3141 }
3142
3143 static void
3144 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
3145 {
3146     struct i965_driver_data *i965 = i965_driver_data(ctx);
3147     struct intel_batchbuffer *batch = i965->batch;
3148     struct i965_render_state *render_state = &i965->render_state;
3149
3150     BEGIN_BATCH(batch, 2);
3151     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
3152     OUT_RELOC(batch,
3153               render_state->wm.sampler,
3154               I915_GEM_DOMAIN_INSTRUCTION, 0,
3155               0);
3156     ADVANCE_BATCH(batch);
3157 }
3158
3159 static void
3160 gen7_emit_binding_table(VADriverContextP ctx)
3161 {
3162     struct i965_driver_data *i965 = i965_driver_data(ctx);
3163     struct intel_batchbuffer *batch = i965->batch;
3164
3165     BEGIN_BATCH(batch, 2);
3166     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
3167     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
3168     ADVANCE_BATCH(batch);
3169 }
3170
3171 static void
3172 gen7_emit_depth_buffer_state(VADriverContextP ctx)
3173 {
3174     struct i965_driver_data *i965 = i965_driver_data(ctx);
3175     struct intel_batchbuffer *batch = i965->batch;
3176
3177     BEGIN_BATCH(batch, 7);
3178     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
3179     OUT_BATCH(batch,
3180               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
3181               (I965_SURFACE_NULL << 29));
3182     OUT_BATCH(batch, 0);
3183     OUT_BATCH(batch, 0);
3184     OUT_BATCH(batch, 0);
3185     OUT_BATCH(batch, 0);
3186     OUT_BATCH(batch, 0);
3187     ADVANCE_BATCH(batch);
3188
3189     BEGIN_BATCH(batch, 3);
3190     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
3191     OUT_BATCH(batch, 0);
3192     OUT_BATCH(batch, 0);
3193     ADVANCE_BATCH(batch);
3194 }
3195
3196 static void
3197 gen7_emit_drawing_rectangle(VADriverContextP ctx)
3198 {
3199     i965_render_drawing_rectangle(ctx);
3200 }
3201
3202 static void 
3203 gen7_emit_vs_state(VADriverContextP ctx)
3204 {
3205     struct i965_driver_data *i965 = i965_driver_data(ctx);
3206     struct intel_batchbuffer *batch = i965->batch;
3207
3208     /* disable VS constant buffer */
3209     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
3210     OUT_BATCH(batch, 0);
3211     OUT_BATCH(batch, 0);
3212     OUT_BATCH(batch, 0);
3213     OUT_BATCH(batch, 0);
3214     OUT_BATCH(batch, 0);
3215     OUT_BATCH(batch, 0);
3216         
3217     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
3218     OUT_BATCH(batch, 0); /* without VS kernel */
3219     OUT_BATCH(batch, 0);
3220     OUT_BATCH(batch, 0);
3221     OUT_BATCH(batch, 0);
3222     OUT_BATCH(batch, 0); /* pass-through */
3223 }
3224
3225 static void 
3226 gen7_emit_bypass_state(VADriverContextP ctx)
3227 {
3228     struct i965_driver_data *i965 = i965_driver_data(ctx);
3229     struct intel_batchbuffer *batch = i965->batch;
3230
3231     /* bypass GS */
3232     BEGIN_BATCH(batch, 7);
3233     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
3234     OUT_BATCH(batch, 0);
3235     OUT_BATCH(batch, 0);
3236     OUT_BATCH(batch, 0);
3237     OUT_BATCH(batch, 0);
3238     OUT_BATCH(batch, 0);
3239     OUT_BATCH(batch, 0);
3240     ADVANCE_BATCH(batch);
3241
3242     BEGIN_BATCH(batch, 7);      
3243     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
3244     OUT_BATCH(batch, 0); /* without GS kernel */
3245     OUT_BATCH(batch, 0);
3246     OUT_BATCH(batch, 0);
3247     OUT_BATCH(batch, 0);
3248     OUT_BATCH(batch, 0);
3249     OUT_BATCH(batch, 0); /* pass-through */
3250     ADVANCE_BATCH(batch);
3251
3252     BEGIN_BATCH(batch, 2);
3253     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3254     OUT_BATCH(batch, 0);
3255     ADVANCE_BATCH(batch);
3256
3257     /* disable HS */
3258     BEGIN_BATCH(batch, 7);
3259     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
3260     OUT_BATCH(batch, 0);
3261     OUT_BATCH(batch, 0);
3262     OUT_BATCH(batch, 0);
3263     OUT_BATCH(batch, 0);
3264     OUT_BATCH(batch, 0);
3265     OUT_BATCH(batch, 0);
3266     ADVANCE_BATCH(batch);
3267
3268     BEGIN_BATCH(batch, 7);
3269     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
3270     OUT_BATCH(batch, 0);
3271     OUT_BATCH(batch, 0);
3272     OUT_BATCH(batch, 0);
3273     OUT_BATCH(batch, 0);
3274     OUT_BATCH(batch, 0);
3275     OUT_BATCH(batch, 0);
3276     ADVANCE_BATCH(batch);
3277
3278     BEGIN_BATCH(batch, 2);
3279     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3280     OUT_BATCH(batch, 0);
3281     ADVANCE_BATCH(batch);
3282
3283     /* Disable TE */
3284     BEGIN_BATCH(batch, 4);
3285     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3286     OUT_BATCH(batch, 0);
3287     OUT_BATCH(batch, 0);
3288     OUT_BATCH(batch, 0);
3289     ADVANCE_BATCH(batch);
3290
3291     /* Disable DS */
3292     BEGIN_BATCH(batch, 7);
3293     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
3294     OUT_BATCH(batch, 0);
3295     OUT_BATCH(batch, 0);
3296     OUT_BATCH(batch, 0);
3297     OUT_BATCH(batch, 0);
3298     OUT_BATCH(batch, 0);
3299     OUT_BATCH(batch, 0);
3300     ADVANCE_BATCH(batch);
3301
3302     BEGIN_BATCH(batch, 6);
3303     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
3304     OUT_BATCH(batch, 0);
3305     OUT_BATCH(batch, 0);
3306     OUT_BATCH(batch, 0);
3307     OUT_BATCH(batch, 0);
3308     OUT_BATCH(batch, 0);
3309     ADVANCE_BATCH(batch);
3310
3311     BEGIN_BATCH(batch, 2);
3312     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3313     OUT_BATCH(batch, 0);
3314     ADVANCE_BATCH(batch);
3315
3316     /* Disable STREAMOUT */
3317     BEGIN_BATCH(batch, 3);
3318     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
3319     OUT_BATCH(batch, 0);
3320     OUT_BATCH(batch, 0);
3321     ADVANCE_BATCH(batch);
3322 }
3323
3324 static void 
3325 gen7_emit_clip_state(VADriverContextP ctx)
3326 {
3327     struct i965_driver_data *i965 = i965_driver_data(ctx);
3328     struct intel_batchbuffer *batch = i965->batch;
3329
3330     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3331     OUT_BATCH(batch, 0);
3332     OUT_BATCH(batch, 0); /* pass-through */
3333     OUT_BATCH(batch, 0);
3334 }
3335
3336 static void 
3337 gen7_emit_sf_state(VADriverContextP ctx)
3338 {
3339     struct i965_driver_data *i965 = i965_driver_data(ctx);
3340     struct intel_batchbuffer *batch = i965->batch;
3341
3342     BEGIN_BATCH(batch, 14);
3343     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
3344     OUT_BATCH(batch,
3345               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3346               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3347               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3348     OUT_BATCH(batch, 0);
3349     OUT_BATCH(batch, 0);
3350     OUT_BATCH(batch, 0); /* DW4 */
3351     OUT_BATCH(batch, 0);
3352     OUT_BATCH(batch, 0);
3353     OUT_BATCH(batch, 0);
3354     OUT_BATCH(batch, 0);
3355     OUT_BATCH(batch, 0); /* DW9 */
3356     OUT_BATCH(batch, 0);
3357     OUT_BATCH(batch, 0);
3358     OUT_BATCH(batch, 0);
3359     OUT_BATCH(batch, 0);
3360     ADVANCE_BATCH(batch);
3361
3362     BEGIN_BATCH(batch, 7);
3363     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
3364     OUT_BATCH(batch, 0);
3365     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
3366     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3367     OUT_BATCH(batch, 0);
3368     OUT_BATCH(batch, 0);
3369     OUT_BATCH(batch, 0);
3370     ADVANCE_BATCH(batch);
3371 }
3372
3373 static void 
3374 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
3375 {
3376     struct i965_driver_data *i965 = i965_driver_data(ctx);
3377     struct intel_batchbuffer *batch = i965->batch;
3378     struct i965_render_state *render_state = &i965->render_state;
3379     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
3380     unsigned int num_samples = 0;
3381
3382     if (IS_HASWELL(i965->intel.device_id)) {
3383         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
3384         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
3385     }
3386
3387     BEGIN_BATCH(batch, 3);
3388     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
3389     OUT_BATCH(batch,
3390               GEN7_WM_DISPATCH_ENABLE |
3391               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3392     OUT_BATCH(batch, 0);
3393     ADVANCE_BATCH(batch);
3394
3395     BEGIN_BATCH(batch, 7);
3396     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
3397     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
3398     OUT_BATCH(batch, 0);
3399     OUT_RELOC(batch, 
3400               render_state->curbe.bo,
3401               I915_GEM_DOMAIN_INSTRUCTION, 0,
3402               0);
3403     OUT_BATCH(batch, 0);
3404     OUT_BATCH(batch, 0);
3405     OUT_BATCH(batch, 0);
3406     ADVANCE_BATCH(batch);
3407
3408     BEGIN_BATCH(batch, 8);
3409     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
3410     OUT_RELOC(batch, 
3411               render_state->render_kernels[kernel].bo,
3412               I915_GEM_DOMAIN_INSTRUCTION, 0,
3413               0);
3414     OUT_BATCH(batch, 
3415               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3416               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3417     OUT_BATCH(batch, 0); /* scratch space base offset */
3418     OUT_BATCH(batch, 
3419               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
3420               GEN7_PS_PUSH_CONSTANT_ENABLE |
3421               GEN7_PS_ATTRIBUTE_ENABLE |
3422               GEN7_PS_16_DISPATCH_ENABLE);
3423     OUT_BATCH(batch, 
3424               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3425     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3426     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3427     ADVANCE_BATCH(batch);
3428 }
3429
3430 static void
3431 gen7_emit_vertex_element_state(VADriverContextP ctx)
3432 {
3433     struct i965_driver_data *i965 = i965_driver_data(ctx);
3434     struct intel_batchbuffer *batch = i965->batch;
3435
3436     /* Set up our vertex elements, sourced from the single vertex buffer. */
3437     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3438     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3439     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3440               GEN6_VE0_VALID |
3441               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3442               (0 << VE0_OFFSET_SHIFT));
3443     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3444               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3445               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3446               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3447     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3448     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3449               GEN6_VE0_VALID |
3450               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3451               (8 << VE0_OFFSET_SHIFT));
3452     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3453               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3454               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3455               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3456 }
3457
3458 static void
3459 gen7_emit_vertices(VADriverContextP ctx)
3460 {
3461     struct i965_driver_data *i965 = i965_driver_data(ctx);
3462     struct intel_batchbuffer *batch = i965->batch;
3463     struct i965_render_state *render_state = &i965->render_state;
3464
3465     BEGIN_BATCH(batch, 5);
3466     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3467     OUT_BATCH(batch, 
3468               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
3469               GEN6_VB0_VERTEXDATA |
3470               GEN7_VB0_ADDRESS_MODIFYENABLE |
3471               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3472     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3473     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
3474     OUT_BATCH(batch, 0);
3475     ADVANCE_BATCH(batch);
3476
3477     BEGIN_BATCH(batch, 7);
3478     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3479     OUT_BATCH(batch,
3480               _3DPRIM_RECTLIST |
3481               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3482     OUT_BATCH(batch, 3); /* vertex count per instance */
3483     OUT_BATCH(batch, 0); /* start vertex offset */
3484     OUT_BATCH(batch, 1); /* single instance */
3485     OUT_BATCH(batch, 0); /* start instance location */
3486     OUT_BATCH(batch, 0);
3487     ADVANCE_BATCH(batch);
3488 }
3489
3490 static void
3491 gen7_render_emit_states(VADriverContextP ctx, int kernel)
3492 {
3493     struct i965_driver_data *i965 = i965_driver_data(ctx);
3494     struct intel_batchbuffer *batch = i965->batch;
3495
3496     intel_batchbuffer_start_atomic(batch, 0x1000);
3497     intel_batchbuffer_emit_mi_flush(batch);
3498     gen7_emit_invarient_states(ctx);
3499     gen7_emit_state_base_address(ctx);
3500     gen7_emit_viewport_state_pointers(ctx);
3501     gen7_emit_urb(ctx);
3502     gen7_emit_cc_state_pointers(ctx);
3503     gen7_emit_sampler_state_pointers(ctx);
3504     gen7_emit_bypass_state(ctx);
3505     gen7_emit_vs_state(ctx);
3506     gen7_emit_clip_state(ctx);
3507     gen7_emit_sf_state(ctx);
3508     gen7_emit_wm_state(ctx, kernel);
3509     gen7_emit_binding_table(ctx);
3510     gen7_emit_depth_buffer_state(ctx);
3511     gen7_emit_drawing_rectangle(ctx);
3512     gen7_emit_vertex_element_state(ctx);
3513     gen7_emit_vertices(ctx);
3514     intel_batchbuffer_end_atomic(batch);
3515 }
3516
3517 static void
3518 gen8_emit_vertices(VADriverContextP ctx)
3519 {
3520     struct i965_driver_data *i965 = i965_driver_data(ctx);
3521     struct intel_batchbuffer *batch = i965->batch;
3522     struct i965_render_state *render_state = &i965->render_state;
3523
3524     BEGIN_BATCH(batch, 5);
3525     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3526     OUT_BATCH(batch, 
3527               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
3528               (0 << GEN8_VB0_MOCS_SHIFT) |
3529               GEN7_VB0_ADDRESS_MODIFYENABLE |
3530               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3531     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3532     OUT_BATCH(batch, 0);
3533     OUT_BATCH(batch, 12 * 4);
3534     ADVANCE_BATCH(batch);
3535
3536     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
3537     BEGIN_BATCH(batch, 2);
3538     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
3539     OUT_BATCH(batch,
3540               _3DPRIM_RECTLIST);
3541     ADVANCE_BATCH(batch);
3542
3543     
3544     BEGIN_BATCH(batch, 7);
3545     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3546     OUT_BATCH(batch,
3547               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3548     OUT_BATCH(batch, 3); /* vertex count per instance */
3549     OUT_BATCH(batch, 0); /* start vertex offset */
3550     OUT_BATCH(batch, 1); /* single instance */
3551     OUT_BATCH(batch, 0); /* start instance location */
3552     OUT_BATCH(batch, 0);
3553     ADVANCE_BATCH(batch);
3554 }
3555
3556 static void
3557 gen8_emit_vertex_element_state(VADriverContextP ctx)
3558 {
3559     struct i965_driver_data *i965 = i965_driver_data(ctx);
3560     struct intel_batchbuffer *batch = i965->batch;
3561
3562     /*
3563      * The VUE layout
3564      * dword 0-3: pad (0, 0, 0. 0)
3565      * dword 4-7: position (x, y, 1.0, 1.0),
3566      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
3567      */
3568
3569     /* Set up our vertex elements, sourced from the single vertex buffer. */
3570     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
3571
3572     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
3573      * We don't really know or care what they do.
3574      */
3575
3576     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3577               GEN8_VE0_VALID |
3578               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3579               (0 << VE0_OFFSET_SHIFT));
3580     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
3581               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
3582               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
3583               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
3584
3585     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
3586     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3587               GEN8_VE0_VALID |
3588               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3589               (8 << VE0_OFFSET_SHIFT));
3590     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3591               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3592               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3593               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3594
3595     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
3596     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3597               GEN8_VE0_VALID |
3598               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3599               (0 << VE0_OFFSET_SHIFT));
3600     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3601               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3602               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3603               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3604 }
3605
3606 static void 
3607 gen8_emit_vs_state(VADriverContextP ctx)
3608 {
3609     struct i965_driver_data *i965 = i965_driver_data(ctx);
3610     struct intel_batchbuffer *batch = i965->batch;
3611
3612     /* disable VS constant buffer */
3613     BEGIN_BATCH(batch, 11);
3614     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
3615     OUT_BATCH(batch, 0);
3616     OUT_BATCH(batch, 0);
3617     /* CS Buffer 0 */
3618     OUT_BATCH(batch, 0);
3619     OUT_BATCH(batch, 0);
3620     /* CS Buffer 1 */
3621     OUT_BATCH(batch, 0);
3622     OUT_BATCH(batch, 0);
3623     /* CS Buffer 2 */
3624     OUT_BATCH(batch, 0);
3625     OUT_BATCH(batch, 0);
3626     /* CS Buffer 3 */
3627     OUT_BATCH(batch, 0);
3628     OUT_BATCH(batch, 0);
3629     ADVANCE_BATCH(batch);
3630         
3631     BEGIN_BATCH(batch, 9);
3632     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
3633     OUT_BATCH(batch, 0); /* without VS kernel */
3634     OUT_BATCH(batch, 0);
3635     /* VS shader dispatch flag */
3636     OUT_BATCH(batch, 0);
3637     OUT_BATCH(batch, 0);
3638     OUT_BATCH(batch, 0);
3639     /* DW6. VS shader GRF and URB buffer definition */
3640     OUT_BATCH(batch, 0);
3641     OUT_BATCH(batch, 0); /* pass-through */
3642     OUT_BATCH(batch, 0);
3643     ADVANCE_BATCH(batch);
3644
3645     BEGIN_BATCH(batch, 2);
3646     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
3647     OUT_BATCH(batch, 0);
3648     ADVANCE_BATCH(batch);
3649
3650     BEGIN_BATCH(batch, 2);
3651     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
3652     OUT_BATCH(batch, 0);
3653     ADVANCE_BATCH(batch);
3654
3655 }
3656
3657 /*
3658  * URB layout on GEN8 
3659  * ----------------------------------------
3660  * | PS Push Constants (8KB) | VS entries |
3661  * ----------------------------------------
3662  */
3663 static void
3664 gen8_emit_urb(VADriverContextP ctx)
3665 {
3666     struct i965_driver_data *i965 = i965_driver_data(ctx);
3667     struct intel_batchbuffer *batch = i965->batch;
3668     unsigned int num_urb_entries = 64;
3669
3670     /* The minimum urb entries is 64 */
3671
3672     BEGIN_BATCH(batch, 2);
3673     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
3674     OUT_BATCH(batch, 0);
3675     ADVANCE_BATCH(batch);
3676
3677     BEGIN_BATCH(batch, 2);
3678     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
3679     OUT_BATCH(batch, 0);
3680     ADVANCE_BATCH(batch);
3681
3682     BEGIN_BATCH(batch, 2);
3683     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
3684     OUT_BATCH(batch, 0);
3685     ADVANCE_BATCH(batch);
3686
3687     BEGIN_BATCH(batch, 2);
3688     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
3689     OUT_BATCH(batch, 0);
3690     ADVANCE_BATCH(batch);
3691
3692     /* Size is 8Kbs and base address is 0Kb */
3693     BEGIN_BATCH(batch, 2);
3694     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3695     /* Size is 8Kbs and base address is 0Kb */
3696     OUT_BATCH(batch,
3697                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
3698                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
3699     ADVANCE_BATCH(batch);
3700
3701     BEGIN_BATCH(batch, 2);
3702     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3703     OUT_BATCH(batch, 
3704               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3705               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3706               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3707    ADVANCE_BATCH(batch);
3708
3709    BEGIN_BATCH(batch, 2);
3710    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3711    OUT_BATCH(batch,
3712              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3713              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3714    ADVANCE_BATCH(batch);
3715
3716    BEGIN_BATCH(batch, 2);
3717    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3718    OUT_BATCH(batch,
3719              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3720              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3721    ADVANCE_BATCH(batch);
3722
3723    BEGIN_BATCH(batch, 2);
3724    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3725    OUT_BATCH(batch,
3726              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3727              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3728    ADVANCE_BATCH(batch);
3729 }
3730
3731 static void 
3732 gen8_emit_bypass_state(VADriverContextP ctx)
3733 {
3734     struct i965_driver_data *i965 = i965_driver_data(ctx);
3735     struct intel_batchbuffer *batch = i965->batch;
3736
3737     /* bypass GS */
3738     BEGIN_BATCH(batch, 11);
3739     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
3740     OUT_BATCH(batch, 0);
3741     OUT_BATCH(batch, 0);
3742     OUT_BATCH(batch, 0);
3743     OUT_BATCH(batch, 0);
3744     OUT_BATCH(batch, 0);
3745     OUT_BATCH(batch, 0);
3746     OUT_BATCH(batch, 0);
3747     OUT_BATCH(batch, 0);
3748     OUT_BATCH(batch, 0);
3749     OUT_BATCH(batch, 0);
3750     ADVANCE_BATCH(batch);
3751
3752     BEGIN_BATCH(batch, 10);     
3753     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
3754     /* GS shader address */
3755     OUT_BATCH(batch, 0); /* without GS kernel */
3756     OUT_BATCH(batch, 0);
3757     /* DW3. GS shader dispatch flag */
3758     OUT_BATCH(batch, 0);
3759     OUT_BATCH(batch, 0);
3760     OUT_BATCH(batch, 0);
3761     /* DW6. GS shader GRF and URB offset/length */
3762     OUT_BATCH(batch, 0);
3763     OUT_BATCH(batch, 0); /* pass-through */
3764     OUT_BATCH(batch, 0);
3765     OUT_BATCH(batch, 0);
3766     ADVANCE_BATCH(batch);
3767
3768     BEGIN_BATCH(batch, 2);
3769     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3770     OUT_BATCH(batch, 0);
3771     ADVANCE_BATCH(batch);
3772
3773     BEGIN_BATCH(batch, 2);
3774     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
3775     OUT_BATCH(batch, 0);
3776     ADVANCE_BATCH(batch);
3777
3778     /* disable HS */
3779     BEGIN_BATCH(batch, 11);
3780     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
3781     OUT_BATCH(batch, 0);
3782     OUT_BATCH(batch, 0);
3783     OUT_BATCH(batch, 0);
3784     OUT_BATCH(batch, 0);
3785     OUT_BATCH(batch, 0);
3786     OUT_BATCH(batch, 0);
3787     OUT_BATCH(batch, 0);
3788     OUT_BATCH(batch, 0);
3789     OUT_BATCH(batch, 0);
3790     OUT_BATCH(batch, 0);
3791     ADVANCE_BATCH(batch);
3792
3793     BEGIN_BATCH(batch, 9);
3794     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
3795     OUT_BATCH(batch, 0);
3796     /*DW2. HS pass-through */
3797     OUT_BATCH(batch, 0);
3798     /*DW3. HS shader address */
3799     OUT_BATCH(batch, 0);
3800     OUT_BATCH(batch, 0);
3801     /*DW5. HS shader flag. URB offset/length and so on */
3802     OUT_BATCH(batch, 0);
3803     OUT_BATCH(batch, 0);
3804     OUT_BATCH(batch, 0);
3805     OUT_BATCH(batch, 0);
3806     ADVANCE_BATCH(batch);
3807
3808     BEGIN_BATCH(batch, 2);
3809     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3810     OUT_BATCH(batch, 0);
3811     ADVANCE_BATCH(batch);
3812
3813     BEGIN_BATCH(batch, 2);
3814     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
3815     OUT_BATCH(batch, 0);
3816     ADVANCE_BATCH(batch);
3817
3818     /* Disable TE */
3819     BEGIN_BATCH(batch, 4);
3820     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3821     OUT_BATCH(batch, 0);
3822     OUT_BATCH(batch, 0);
3823     OUT_BATCH(batch, 0);
3824     ADVANCE_BATCH(batch);
3825
3826     /* Disable DS */
3827     BEGIN_BATCH(batch, 11);
3828     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
3829     OUT_BATCH(batch, 0);
3830     OUT_BATCH(batch, 0);
3831     OUT_BATCH(batch, 0);
3832     OUT_BATCH(batch, 0);
3833     OUT_BATCH(batch, 0);
3834     OUT_BATCH(batch, 0);
3835     OUT_BATCH(batch, 0);
3836     OUT_BATCH(batch, 0);
3837     OUT_BATCH(batch, 0);
3838     OUT_BATCH(batch, 0);
3839     ADVANCE_BATCH(batch);
3840
3841     BEGIN_BATCH(batch, 9);
3842     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
3843     /* DW1. DS shader pointer */
3844     OUT_BATCH(batch, 0);
3845     OUT_BATCH(batch, 0);
3846     /* DW3-5. DS shader dispatch flag.*/
3847     OUT_BATCH(batch, 0);
3848     OUT_BATCH(batch, 0);
3849     OUT_BATCH(batch, 0);
3850     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
3851     OUT_BATCH(batch, 0);
3852     OUT_BATCH(batch, 0);
3853     /* DW8. DS shader output URB */
3854     OUT_BATCH(batch, 0);
3855     ADVANCE_BATCH(batch);
3856
3857     BEGIN_BATCH(batch, 2);
3858     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3859     OUT_BATCH(batch, 0);
3860     ADVANCE_BATCH(batch);
3861
3862     BEGIN_BATCH(batch, 2);
3863     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
3864     OUT_BATCH(batch, 0);
3865     ADVANCE_BATCH(batch);
3866
3867     /* Disable STREAMOUT */
3868     BEGIN_BATCH(batch, 5);
3869     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
3870     OUT_BATCH(batch, 0);
3871     OUT_BATCH(batch, 0);
3872     OUT_BATCH(batch, 0);
3873     OUT_BATCH(batch, 0);
3874     ADVANCE_BATCH(batch);
3875 }
3876
3877 static void
3878 gen8_emit_invarient_states(VADriverContextP ctx)
3879 {
3880     struct i965_driver_data *i965 = i965_driver_data(ctx);
3881     struct intel_batchbuffer *batch = i965->batch;
3882
3883     BEGIN_BATCH(batch, 1);
3884     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
3885     ADVANCE_BATCH(batch);
3886
3887     BEGIN_BATCH(batch, 2);
3888     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
3889     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
3890               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
3891     ADVANCE_BATCH(batch);
3892
3893     /* Update 3D Multisample pattern */
3894     BEGIN_BATCH(batch, 9);
3895     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
3896     OUT_BATCH(batch, 0);
3897     OUT_BATCH(batch, 0);
3898     OUT_BATCH(batch, 0);
3899     OUT_BATCH(batch, 0);
3900     OUT_BATCH(batch, 0);
3901     OUT_BATCH(batch, 0);
3902     OUT_BATCH(batch, 0);
3903     OUT_BATCH(batch, 0);
3904     ADVANCE_BATCH(batch);
3905
3906
3907     BEGIN_BATCH(batch, 2);
3908     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
3909     OUT_BATCH(batch, 1);
3910     ADVANCE_BATCH(batch);
3911
3912     /* Set system instruction pointer */
3913     BEGIN_BATCH(batch, 3);
3914     OUT_BATCH(batch, CMD_STATE_SIP | 0);
3915     OUT_BATCH(batch, 0);
3916     OUT_BATCH(batch, 0);
3917     ADVANCE_BATCH(batch);
3918 }
3919
3920 static void 
3921 gen8_emit_clip_state(VADriverContextP ctx)
3922 {
3923     struct i965_driver_data *i965 = i965_driver_data(ctx);
3924     struct intel_batchbuffer *batch = i965->batch;
3925
3926     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3927     OUT_BATCH(batch, 0);
3928     OUT_BATCH(batch, 0); /* pass-through */
3929     OUT_BATCH(batch, 0);
3930 }
3931
3932 static void 
3933 gen8_emit_sf_state(VADriverContextP ctx)
3934 {
3935     struct i965_driver_data *i965 = i965_driver_data(ctx);
3936     struct intel_batchbuffer *batch = i965->batch;
3937
3938     BEGIN_BATCH(batch, 5);
3939     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
3940     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
3941     OUT_BATCH(batch, 0);
3942     OUT_BATCH(batch, 0);
3943     OUT_BATCH(batch, 0);
3944     ADVANCE_BATCH(batch);
3945
3946
3947     BEGIN_BATCH(batch, 4);
3948     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
3949     OUT_BATCH(batch,
3950               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
3951               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
3952               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3953               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3954               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3955     OUT_BATCH(batch, 0);
3956     OUT_BATCH(batch, 0);
3957     ADVANCE_BATCH(batch);
3958
3959     /* SBE for backend setup */
3960     BEGIN_BATCH(batch, 11);
3961     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
3962     OUT_BATCH(batch, 0);
3963     OUT_BATCH(batch, 0);
3964     OUT_BATCH(batch, 0);
3965     OUT_BATCH(batch, 0);
3966     OUT_BATCH(batch, 0);
3967     OUT_BATCH(batch, 0);
3968     OUT_BATCH(batch, 0);
3969     OUT_BATCH(batch, 0);
3970     OUT_BATCH(batch, 0);
3971     OUT_BATCH(batch, 0);
3972     ADVANCE_BATCH(batch);
3973
3974     BEGIN_BATCH(batch, 4);
3975     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
3976     OUT_BATCH(batch, 0);
3977     OUT_BATCH(batch, 0);
3978     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3979     ADVANCE_BATCH(batch);
3980 }
3981
3982 static void 
3983 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
3984 {
3985     struct i965_driver_data *i965 = i965_driver_data(ctx);
3986     struct intel_batchbuffer *batch = i965->batch;
3987     struct i965_render_state *render_state = &i965->render_state;
3988     unsigned int num_samples = 0;
3989     unsigned int max_threads;
3990
3991     max_threads = render_state->max_wm_threads - 2;
3992
3993     BEGIN_BATCH(batch, 2);
3994     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
3995     OUT_BATCH(batch,
3996               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
3997     ADVANCE_BATCH(batch);
3998
3999     
4000     if (kernel == PS_KERNEL) {
4001         BEGIN_BATCH(batch, 2);
4002         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
4003         OUT_BATCH(batch,
4004                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
4005         ADVANCE_BATCH(batch);
4006     } else if (kernel == PS_SUBPIC_KERNEL) {
4007         BEGIN_BATCH(batch, 2);
4008         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
4009         OUT_BATCH(batch,
4010                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
4011                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
4012                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
4013                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
4014                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
4015                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
4016         ADVANCE_BATCH(batch);
4017     }
4018
4019     BEGIN_BATCH(batch, 2);
4020     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
4021     OUT_BATCH(batch,
4022               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
4023     ADVANCE_BATCH(batch);
4024
4025     BEGIN_BATCH(batch, 11);
4026     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
4027     OUT_BATCH(batch, 1);
4028     OUT_BATCH(batch, 0);
4029     /*DW3-4. Constant buffer 0 */
4030     OUT_BATCH(batch, render_state->curbe_offset);
4031     OUT_BATCH(batch, 0);
4032
4033     /*DW5-10. Constant buffer 1-3 */
4034     OUT_BATCH(batch, 0);
4035     OUT_BATCH(batch, 0);
4036     OUT_BATCH(batch, 0);
4037     OUT_BATCH(batch, 0);
4038     OUT_BATCH(batch, 0);
4039     OUT_BATCH(batch, 0);
4040     ADVANCE_BATCH(batch);
4041
4042     BEGIN_BATCH(batch, 12);
4043     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
4044     /* PS shader address */
4045     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
4046
4047     OUT_BATCH(batch, 0);
4048     /* DW3. PS shader flag .Binding table cnt/sample cnt */
4049     OUT_BATCH(batch, 
4050               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
4051               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
4052     /* DW4-5. Scatch space */
4053     OUT_BATCH(batch, 0); /* scratch space base offset */
4054     OUT_BATCH(batch, 0);
4055     /* DW6. PS shader threads. */
4056     OUT_BATCH(batch, 
4057               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
4058               GEN7_PS_PUSH_CONSTANT_ENABLE |
4059               GEN7_PS_16_DISPATCH_ENABLE);
4060     /* DW7. PS shader GRF */
4061     OUT_BATCH(batch, 
4062               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
4063     OUT_BATCH(batch, 0); /* kernel 1 pointer */
4064     OUT_BATCH(batch, 0);
4065     OUT_BATCH(batch, 0); /* kernel 2 pointer */
4066     OUT_BATCH(batch, 0);
4067     ADVANCE_BATCH(batch);
4068
4069     BEGIN_BATCH(batch, 2);
4070     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
4071     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
4072     ADVANCE_BATCH(batch);
4073 }
4074
4075 static void
4076 gen8_emit_depth_buffer_state(VADriverContextP ctx)
4077 {
4078     struct i965_driver_data *i965 = i965_driver_data(ctx);
4079     struct intel_batchbuffer *batch = i965->batch;
4080
4081     BEGIN_BATCH(batch, 8);
4082     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
4083     OUT_BATCH(batch,
4084               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
4085               (I965_SURFACE_NULL << 29));
4086     /* DW2-3. Depth Buffer Address */
4087     OUT_BATCH(batch, 0);
4088     OUT_BATCH(batch, 0);
4089     /* DW4-7. Surface structure */
4090     OUT_BATCH(batch, 0);
4091     OUT_BATCH(batch, 0);
4092     OUT_BATCH(batch, 0);
4093     OUT_BATCH(batch, 0);
4094     ADVANCE_BATCH(batch);
4095
4096     /* Update the Hier Depth buffer */
4097     BEGIN_BATCH(batch, 5);
4098     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
4099     OUT_BATCH(batch, 0);
4100     OUT_BATCH(batch, 0);
4101     OUT_BATCH(batch, 0);
4102     OUT_BATCH(batch, 0);
4103     ADVANCE_BATCH(batch);
4104     
4105     /* Update the stencil buffer */
4106     BEGIN_BATCH(batch, 5);
4107     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
4108     OUT_BATCH(batch, 0);
4109     OUT_BATCH(batch, 0);
4110     OUT_BATCH(batch, 0);
4111     OUT_BATCH(batch, 0);
4112     ADVANCE_BATCH(batch);
4113     
4114     BEGIN_BATCH(batch, 3);
4115     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
4116     OUT_BATCH(batch, 0);
4117     OUT_BATCH(batch, 0);
4118     ADVANCE_BATCH(batch);
4119 }
4120
4121 static void
4122 gen8_emit_depth_stencil_state(VADriverContextP ctx)
4123 {
4124     struct i965_driver_data *i965 = i965_driver_data(ctx);
4125     struct intel_batchbuffer *batch = i965->batch;
4126
4127     BEGIN_BATCH(batch, 3);
4128     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
4129     OUT_BATCH(batch, 0);
4130     OUT_BATCH(batch, 0);
4131     ADVANCE_BATCH(batch);
4132 }
4133
4134 static void
4135 gen8_emit_wm_hz_op(VADriverContextP ctx)
4136 {
4137     struct i965_driver_data *i965 = i965_driver_data(ctx);
4138     struct intel_batchbuffer *batch = i965->batch;
4139
4140     BEGIN_BATCH(batch, 5);
4141     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
4142     OUT_BATCH(batch, 0);
4143     OUT_BATCH(batch, 0);
4144     OUT_BATCH(batch, 0);
4145     OUT_BATCH(batch, 0);
4146     ADVANCE_BATCH(batch);
4147 }
4148
4149 static void
4150 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
4151 {
4152     struct i965_driver_data *i965 = i965_driver_data(ctx);
4153     struct intel_batchbuffer *batch = i965->batch;
4154     struct i965_render_state *render_state = &i965->render_state;
4155
4156     BEGIN_BATCH(batch, 2);
4157     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
4158     OUT_BATCH(batch, render_state->cc_viewport_offset);
4159     ADVANCE_BATCH(batch);
4160
4161     BEGIN_BATCH(batch, 2);
4162     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
4163     OUT_BATCH(batch, 0);
4164     ADVANCE_BATCH(batch);
4165 }
4166
4167 static void
4168 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
4169 {
4170     struct i965_driver_data *i965 = i965_driver_data(ctx);
4171     struct intel_batchbuffer *batch = i965->batch;
4172     struct i965_render_state *render_state = &i965->render_state;
4173
4174     BEGIN_BATCH(batch, 2);
4175     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
4176     OUT_BATCH(batch, render_state->sampler_offset);
4177     ADVANCE_BATCH(batch);
4178 }
4179
4180
4181 static void
4182 gen8_render_emit_states(VADriverContextP ctx, int kernel)
4183 {
4184     struct i965_driver_data *i965 = i965_driver_data(ctx);
4185     struct intel_batchbuffer *batch = i965->batch;
4186
4187     intel_batchbuffer_start_atomic(batch, 0x1000);
4188     intel_batchbuffer_emit_mi_flush(batch);
4189     gen8_emit_invarient_states(ctx);
4190     gen8_emit_state_base_address(ctx);
4191     gen8_emit_viewport_state_pointers(ctx);
4192     gen8_emit_urb(ctx);
4193     gen8_emit_cc_state_pointers(ctx);
4194     gen8_emit_sampler_state_pointers(ctx);
4195     gen8_emit_wm_hz_op(ctx);
4196     gen8_emit_bypass_state(ctx);
4197     gen8_emit_vs_state(ctx);
4198     gen8_emit_clip_state(ctx);
4199     gen8_emit_sf_state(ctx);
4200     gen8_emit_depth_stencil_state(ctx);
4201     gen8_emit_wm_state(ctx, kernel);
4202     gen8_emit_depth_buffer_state(ctx);
4203     gen7_emit_drawing_rectangle(ctx);
4204     gen8_emit_vertex_element_state(ctx);
4205     gen8_emit_vertices(ctx);
4206     intel_batchbuffer_end_atomic(batch);
4207 }
4208
4209 static void
4210 gen7_render_put_surface(
4211     VADriverContextP   ctx,
4212     struct object_surface *obj_surface,    
4213     const VARectangle *src_rect,
4214     const VARectangle *dst_rect,
4215     unsigned int       flags
4216 )
4217 {
4218     struct i965_driver_data *i965 = i965_driver_data(ctx);
4219     struct intel_batchbuffer *batch = i965->batch;
4220
4221     gen7_render_initialize(ctx);
4222     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4223     i965_clear_dest_region(ctx);
4224     gen7_render_emit_states(ctx, PS_KERNEL);
4225     intel_batchbuffer_flush(batch);
4226 }
4227
4228 static void
4229 gen8_render_put_surface(
4230     VADriverContextP   ctx,
4231     struct object_surface *obj_surface,    
4232     const VARectangle *src_rect,
4233     const VARectangle *dst_rect,
4234     unsigned int       flags
4235 )
4236 {
4237     struct i965_driver_data *i965 = i965_driver_data(ctx);
4238     struct intel_batchbuffer *batch = i965->batch;
4239
4240     gen8_render_initialize(ctx);
4241     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4242     gen8_clear_dest_region(ctx);
4243     gen8_render_emit_states(ctx, PS_KERNEL);
4244     intel_batchbuffer_flush(batch);
4245 }
4246
4247 static void
4248 gen7_subpicture_render_blend_state(VADriverContextP ctx)
4249 {
4250     struct i965_driver_data *i965 = i965_driver_data(ctx);
4251     struct i965_render_state *render_state = &i965->render_state;
4252     struct gen6_blend_state *blend_state;
4253
4254     dri_bo_unmap(render_state->cc.state);    
4255     dri_bo_map(render_state->cc.blend, 1);
4256     assert(render_state->cc.blend->virtual);
4257     blend_state = render_state->cc.blend->virtual;
4258     memset(blend_state, 0, sizeof(*blend_state));
4259     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4260     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4261     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
4262     blend_state->blend0.blend_enable = 1;
4263     blend_state->blend1.post_blend_clamp_enable = 1;
4264     blend_state->blend1.pre_blend_clamp_enable = 1;
4265     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4266     dri_bo_unmap(render_state->cc.blend);
4267 }
4268
4269 static void
4270 gen8_subpicture_render_blend_state(VADriverContextP ctx)
4271 {
4272     struct i965_driver_data *i965 = i965_driver_data(ctx);
4273     struct i965_render_state *render_state = &i965->render_state;
4274     struct gen8_global_blend_state *global_blend_state;
4275     struct gen8_blend_state_rt *blend_state;
4276     unsigned char *cc_ptr;
4277     
4278     dri_bo_map(render_state->dynamic_state.bo, 1);
4279     assert(render_state->dynamic_state.bo->virtual);
4280
4281     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4282                         render_state->blend_state_offset;
4283
4284     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
4285
4286     memset(global_blend_state, 0, sizeof(*global_blend_state));
4287     /* Global blend state + blend_state for Render Target */
4288     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
4289     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
4290     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4291     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4292     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
4293     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4294     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4295     blend_state->blend0.colorbuf_blend = 1;
4296     blend_state->blend1.post_blend_clamp_enable = 1;
4297     blend_state->blend1.pre_blend_clamp_enable = 1;
4298     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4299
4300     dri_bo_unmap(render_state->dynamic_state.bo);
4301 }
4302
4303 static void
4304 gen7_subpicture_render_setup_states(
4305     VADriverContextP   ctx,
4306     struct object_surface *obj_surface,
4307     const VARectangle *src_rect,
4308     const VARectangle *dst_rect
4309 )
4310 {
4311     i965_render_dest_surface_state(ctx, 0);
4312     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4313     i965_render_sampler(ctx);
4314     i965_render_cc_viewport(ctx);
4315     gen7_render_color_calc_state(ctx);
4316     gen7_subpicture_render_blend_state(ctx);
4317     gen7_render_depth_stencil_state(ctx);
4318     i965_subpic_render_upload_constants(ctx, obj_surface);
4319     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4320 }
4321
4322 static void
4323 gen8_subpic_render_upload_constants(VADriverContextP ctx,
4324                                     struct object_surface *obj_surface)
4325 {
4326     struct i965_driver_data *i965 = i965_driver_data(ctx);
4327     struct i965_render_state *render_state = &i965->render_state;
4328     float *constant_buffer;
4329     float global_alpha = 1.0;
4330     unsigned int index = obj_surface->subpic_render_idx;
4331     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4332     unsigned char *cc_ptr;
4333
4334     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
4335         global_alpha = obj_subpic->global_alpha;
4336     }
4337
4338
4339     dri_bo_map(render_state->dynamic_state.bo, 1);
4340     assert(render_state->dynamic_state.bo->virtual);
4341
4342     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4343                                 render_state->curbe_offset;
4344
4345     constant_buffer = (float *) cc_ptr;
4346     *constant_buffer = global_alpha;
4347
4348     dri_bo_unmap(render_state->dynamic_state.bo);
4349 }
4350
4351 static void
4352 gen8_subpicture_render_setup_states(
4353     VADriverContextP   ctx,
4354     struct object_surface *obj_surface,
4355     const VARectangle *src_rect,
4356     const VARectangle *dst_rect
4357 )
4358 {
4359     i965_render_dest_surface_state(ctx, 0);
4360     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4361     gen8_render_sampler(ctx);
4362     gen8_render_cc_viewport(ctx);
4363     gen8_render_color_calc_state(ctx);
4364     gen8_subpicture_render_blend_state(ctx);
4365     gen8_subpic_render_upload_constants(ctx, obj_surface);
4366     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4367 }
4368
4369 static void
4370 gen7_render_put_subpicture(
4371     VADriverContextP   ctx,
4372     struct object_surface *obj_surface,
4373     const VARectangle *src_rect,
4374     const VARectangle *dst_rect
4375 )
4376 {
4377     struct i965_driver_data *i965 = i965_driver_data(ctx);
4378     struct intel_batchbuffer *batch = i965->batch;
4379     unsigned int index = obj_surface->subpic_render_idx;
4380     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4381
4382     assert(obj_subpic);
4383     gen7_render_initialize(ctx);
4384     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4385     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4386     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4387     intel_batchbuffer_flush(batch);
4388 }
4389
4390 static void
4391 gen8_render_put_subpicture(
4392     VADriverContextP   ctx,
4393     struct object_surface *obj_surface,
4394     const VARectangle *src_rect,
4395     const VARectangle *dst_rect
4396 )
4397 {
4398     struct i965_driver_data *i965 = i965_driver_data(ctx);
4399     struct intel_batchbuffer *batch = i965->batch;
4400     unsigned int index = obj_surface->subpic_render_idx;
4401     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4402
4403     assert(obj_subpic);
4404     gen8_render_initialize(ctx);
4405     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4406     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4407     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4408     intel_batchbuffer_flush(batch);
4409 }
4410
4411 /*
4412  * global functions
4413  */
4414 VAStatus 
4415 i965_DestroySurfaces(VADriverContextP ctx,
4416                      VASurfaceID *surface_list,
4417                      int num_surfaces);
4418 void
4419 intel_render_put_surface(
4420     VADriverContextP   ctx,
4421     struct object_surface *obj_surface,
4422     const VARectangle *src_rect,
4423     const VARectangle *dst_rect,
4424     unsigned int       flags
4425 )
4426 {
4427     struct i965_driver_data *i965 = i965_driver_data(ctx);
4428     int has_done_scaling = 0;
4429     VASurfaceID out_surface_id = i965_post_processing(ctx,
4430                                                       obj_surface,
4431                                                       src_rect,
4432                                                       dst_rect,
4433                                                       flags,
4434                                                       &has_done_scaling);
4435
4436     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
4437
4438     if (out_surface_id != VA_INVALID_ID) {
4439         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
4440         
4441         if (new_obj_surface && new_obj_surface->bo)
4442             obj_surface = new_obj_surface;
4443
4444         if (has_done_scaling)
4445             src_rect = dst_rect;
4446     }
4447
4448     if (IS_GEN8(i965->intel.device_id))
4449         gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4450     else if (IS_GEN7(i965->intel.device_id))
4451         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4452     else if (IS_GEN6(i965->intel.device_id))
4453         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4454     else
4455         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4456
4457     if (out_surface_id != VA_INVALID_ID)
4458         i965_DestroySurfaces(ctx, &out_surface_id, 1);
4459 }
4460
4461 void
4462 intel_render_put_subpicture(
4463     VADriverContextP   ctx,
4464     struct object_surface *obj_surface,
4465     const VARectangle *src_rect,
4466     const VARectangle *dst_rect
4467 )
4468 {
4469     struct i965_driver_data *i965 = i965_driver_data(ctx);
4470
4471     if (IS_GEN8(i965->intel.device_id))
4472         gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4473     else if (IS_GEN7(i965->intel.device_id))
4474         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4475     else if (IS_GEN6(i965->intel.device_id))
4476         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4477     else
4478         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4479 }
4480
4481 static bool 
4482 gen8_render_init(VADriverContextP ctx)
4483 {
4484     struct i965_driver_data *i965 = i965_driver_data(ctx);
4485     struct i965_render_state *render_state = &i965->render_state;
4486     int i, kernel_size;
4487     unsigned int kernel_offset, end_offset;
4488     unsigned char *kernel_ptr;
4489     struct i965_kernel *kernel;
4490
4491
4492     if (IS_GEN8(i965->intel.device_id)) {
4493         memcpy(render_state->render_kernels, render_kernels_gen8,
4494                         sizeof(render_state->render_kernels));
4495     }
4496
4497     kernel_size = 4096;
4498
4499     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4500         kernel = &render_state->render_kernels[i];
4501
4502         if (!kernel->size)
4503             continue;
4504
4505         kernel_size += kernel->size;
4506     }
4507
4508     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
4509                                   "kernel shader",
4510                                   kernel_size,
4511                                   0x1000);
4512     if (render_state->instruction_state.bo == NULL) {
4513         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
4514         return false;
4515     }
4516
4517     assert(render_state->instruction_state.bo);
4518
4519     render_state->instruction_state.bo_size = kernel_size;
4520     render_state->instruction_state.end_offset = 0;
4521     end_offset = 0;
4522
4523     dri_bo_map(render_state->instruction_state.bo, 1);
4524     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
4525     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4526         kernel = &render_state->render_kernels[i];
4527         kernel_offset = ALIGN(end_offset, 64);
4528         kernel->kernel_offset = kernel_offset;
4529
4530         if (!kernel->size)
4531             continue;
4532
4533         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
4534
4535         end_offset += kernel->size;
4536     }
4537
4538     render_state->instruction_state.end_offset = end_offset;
4539
4540     dri_bo_unmap(render_state->instruction_state.bo);
4541
4542
4543     if (IS_GEN8(i965->intel.device_id)) {
4544         render_state->max_wm_threads = 64;
4545     } else {
4546         /* should never get here !!! */
4547         assert(0);
4548     }
4549
4550     return true;
4551 }
4552
4553
4554 bool 
4555 i965_render_init(VADriverContextP ctx)
4556 {
4557     struct i965_driver_data *i965 = i965_driver_data(ctx);
4558     struct i965_render_state *render_state = &i965->render_state;
4559     int i;
4560
4561     /* kernel */
4562     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
4563                                  sizeof(render_kernels_gen5[0])));
4564     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
4565                                  sizeof(render_kernels_gen6[0])));
4566
4567     if (IS_GEN8(i965->intel.device_id)) {
4568         return gen8_render_init(ctx);
4569     } else  if (IS_GEN7(i965->intel.device_id)) 
4570         memcpy(render_state->render_kernels,
4571                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
4572                sizeof(render_state->render_kernels));
4573     else if (IS_GEN6(i965->intel.device_id))
4574         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
4575     else if (IS_IRONLAKE(i965->intel.device_id))
4576         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
4577     else
4578         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
4579
4580     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4581         struct i965_kernel *kernel = &render_state->render_kernels[i];
4582
4583         if (!kernel->size)
4584             continue;
4585
4586         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
4587                                   kernel->name, 
4588                                   kernel->size, 0x1000);
4589         assert(kernel->bo);
4590         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
4591     }
4592
4593     /* constant buffer */
4594     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
4595                       "constant buffer",
4596                       4096, 64);
4597     assert(render_state->curbe.bo);
4598
4599     if (IS_HSW_GT1(i965->intel.device_id)) {
4600         render_state->max_wm_threads = 102;
4601     } else if (IS_HSW_GT2(i965->intel.device_id)) {
4602         render_state->max_wm_threads = 204;
4603     } else if (IS_HSW_GT3(i965->intel.device_id)) {
4604         render_state->max_wm_threads = 408;
4605     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
4606         render_state->max_wm_threads = 48;
4607     } else if (IS_IVB_GT2(i965->intel.device_id)) {
4608         render_state->max_wm_threads = 172;
4609     } else if (IS_SNB_GT1(i965->intel.device_id)) {
4610         render_state->max_wm_threads = 40;
4611     } else if (IS_SNB_GT2(i965->intel.device_id)) {
4612         render_state->max_wm_threads = 80;
4613     } else if (IS_IRONLAKE(i965->intel.device_id)) {
4614         render_state->max_wm_threads = 72; /* 12 * 6 */
4615     } else if (IS_G4X(i965->intel.device_id)) {
4616         render_state->max_wm_threads = 50; /* 12 * 5 */
4617     } else {
4618         /* should never get here !!! */
4619         assert(0);
4620     }
4621
4622     return true;
4623 }
4624
4625 static void 
4626 gen8_render_terminate(VADriverContextP ctx)
4627 {
4628     int i;
4629     struct i965_driver_data *i965 = i965_driver_data(ctx);
4630     struct i965_render_state *render_state = &i965->render_state;
4631
4632     dri_bo_unreference(render_state->vb.vertex_buffer);
4633     render_state->vb.vertex_buffer = NULL;
4634
4635     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4636     render_state->wm.surface_state_binding_table_bo = NULL;
4637    
4638     if (render_state->instruction_state.bo) {
4639         dri_bo_unreference(render_state->instruction_state.bo);
4640         render_state->instruction_state.bo = NULL;
4641     }
4642
4643     if (render_state->dynamic_state.bo) {
4644         dri_bo_unreference(render_state->dynamic_state.bo);
4645         render_state->dynamic_state.bo = NULL;
4646     }
4647
4648     if (render_state->indirect_state.bo) {
4649         dri_bo_unreference(render_state->indirect_state.bo);
4650         render_state->indirect_state.bo = NULL;
4651     }
4652
4653     if (render_state->draw_region) {
4654         dri_bo_unreference(render_state->draw_region->bo);
4655         free(render_state->draw_region);
4656         render_state->draw_region = NULL;
4657     }
4658 }
4659
4660 void 
4661 i965_render_terminate(VADriverContextP ctx)
4662 {
4663     int i;
4664     struct i965_driver_data *i965 = i965_driver_data(ctx);
4665     struct i965_render_state *render_state = &i965->render_state;
4666
4667     if (IS_GEN8(i965->intel.device_id)) {
4668         gen8_render_terminate(ctx);
4669         return;
4670     } 
4671
4672     dri_bo_unreference(render_state->curbe.bo);
4673     render_state->curbe.bo = NULL;
4674
4675     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4676         struct i965_kernel *kernel = &render_state->render_kernels[i];
4677         
4678         dri_bo_unreference(kernel->bo);
4679         kernel->bo = NULL;
4680     }
4681
4682     dri_bo_unreference(render_state->vb.vertex_buffer);
4683     render_state->vb.vertex_buffer = NULL;
4684     dri_bo_unreference(render_state->vs.state);
4685     render_state->vs.state = NULL;
4686     dri_bo_unreference(render_state->sf.state);
4687     render_state->sf.state = NULL;
4688     dri_bo_unreference(render_state->wm.sampler);
4689     render_state->wm.sampler = NULL;
4690     dri_bo_unreference(render_state->wm.state);
4691     render_state->wm.state = NULL;
4692     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4693     dri_bo_unreference(render_state->cc.viewport);
4694     render_state->cc.viewport = NULL;
4695     dri_bo_unreference(render_state->cc.state);
4696     render_state->cc.state = NULL;
4697     dri_bo_unreference(render_state->cc.blend);
4698     render_state->cc.blend = NULL;
4699     dri_bo_unreference(render_state->cc.depth_stencil);
4700     render_state->cc.depth_stencil = NULL;
4701
4702     if (render_state->draw_region) {
4703         dri_bo_unreference(render_state->draw_region->bo);
4704         free(render_state->draw_region);
4705         render_state->draw_region = NULL;
4706     }
4707 }
4708