OSDN Git Service

svct: Usa an array to store QP rounding accumulator
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47 #include "i965_yuv_coefs.h"
48
49 #include "i965_render.h"
50
51 #define SF_KERNEL_NUM_GRF       16
52 #define SF_MAX_THREADS          1
53
54 static const uint32_t sf_kernel_static[][4] = 
55 {
56 #include "shaders/render/exa_sf.g4b"
57 };
58
59 #define PS_KERNEL_NUM_GRF       48
60 #define PS_MAX_THREADS          32
61
62 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
63
64 static const uint32_t ps_kernel_static[][4] = 
65 {
66 #include "shaders/render/exa_wm_xy.g4b"
67 #include "shaders/render/exa_wm_src_affine.g4b"
68 #include "shaders/render/exa_wm_src_sample_planar.g4b"
69 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
70 #include "shaders/render/exa_wm_yuv_rgb.g4b"
71 #include "shaders/render/exa_wm_write.g4b"
72 };
73 static const uint32_t ps_subpic_kernel_static[][4] = 
74 {
75 #include "shaders/render/exa_wm_xy.g4b"
76 #include "shaders/render/exa_wm_src_affine.g4b"
77 #include "shaders/render/exa_wm_src_sample_argb.g4b"
78 #include "shaders/render/exa_wm_write.g4b"
79 };
80
81 /* On IRONLAKE */
82 static const uint32_t sf_kernel_static_gen5[][4] = 
83 {
84 #include "shaders/render/exa_sf.g4b.gen5"
85 };
86
87 static const uint32_t ps_kernel_static_gen5[][4] = 
88 {
89 #include "shaders/render/exa_wm_xy.g4b.gen5"
90 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
91 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
93 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
94 #include "shaders/render/exa_wm_write.g4b.gen5"
95 };
96 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
97 {
98 #include "shaders/render/exa_wm_xy.g4b.gen5"
99 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
100 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
101 #include "shaders/render/exa_wm_write.g4b.gen5"
102 };
103
104 /* programs for Sandybridge */
105 static const uint32_t sf_kernel_static_gen6[][4] = 
106 {
107 };
108
109 static const uint32_t ps_kernel_static_gen6[][4] = {
110 #include "shaders/render/exa_wm_src_affine.g6b"
111 #include "shaders/render/exa_wm_src_sample_planar.g6b"
112 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
113 #include "shaders/render/exa_wm_yuv_rgb.g6b"
114 #include "shaders/render/exa_wm_write.g6b"
115 };
116
117 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
118 #include "shaders/render/exa_wm_src_affine.g6b"
119 #include "shaders/render/exa_wm_src_sample_argb.g6b"
120 #include "shaders/render/exa_wm_write.g6b"
121 };
122
123 /* programs for Ivybridge */
124 static const uint32_t sf_kernel_static_gen7[][4] = 
125 {
126 };
127
128 static const uint32_t ps_kernel_static_gen7[][4] = {
129 #include "shaders/render/exa_wm_src_affine.g7b"
130 #include "shaders/render/exa_wm_src_sample_planar.g7b"
131 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
132 #include "shaders/render/exa_wm_yuv_rgb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
137 #include "shaders/render/exa_wm_src_affine.g7b"
138 #include "shaders/render/exa_wm_src_sample_argb.g7b"
139 #include "shaders/render/exa_wm_write.g7b"
140 };
141
142 /* Programs for Haswell */
143 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
144 #include "shaders/render/exa_wm_src_affine.g7b"
145 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
147 #include "shaders/render/exa_wm_yuv_rgb.g7b"
148 #include "shaders/render/exa_wm_write.g7b"
149 };
150
151
152 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
153
154 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
155 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
156
157 static uint32_t float_to_uint (float f) 
158 {
159     union {
160         uint32_t i; 
161         float f;
162     } x;
163
164     x.f = f;
165     return x.i;
166 }
167
168 enum 
169 {
170     SF_KERNEL = 0,
171     PS_KERNEL,
172     PS_SUBPIC_KERNEL
173 };
174
175 static struct i965_kernel render_kernels_gen4[] = {
176     {
177         "SF",
178         SF_KERNEL,
179         sf_kernel_static,
180         sizeof(sf_kernel_static),
181         NULL
182     },
183     {
184         "PS",
185         PS_KERNEL,
186         ps_kernel_static,
187         sizeof(ps_kernel_static),
188         NULL
189     },
190
191     {
192         "PS_SUBPIC",
193         PS_SUBPIC_KERNEL,
194         ps_subpic_kernel_static,
195         sizeof(ps_subpic_kernel_static),
196         NULL
197     }
198 };
199
200 static struct i965_kernel render_kernels_gen5[] = {
201     {
202         "SF",
203         SF_KERNEL,
204         sf_kernel_static_gen5,
205         sizeof(sf_kernel_static_gen5),
206         NULL
207     },
208     {
209         "PS",
210         PS_KERNEL,
211         ps_kernel_static_gen5,
212         sizeof(ps_kernel_static_gen5),
213         NULL
214     },
215
216     {
217         "PS_SUBPIC",
218         PS_SUBPIC_KERNEL,
219         ps_subpic_kernel_static_gen5,
220         sizeof(ps_subpic_kernel_static_gen5),
221         NULL
222     }
223 };
224
225 static struct i965_kernel render_kernels_gen6[] = {
226     {
227         "SF",
228         SF_KERNEL,
229         sf_kernel_static_gen6,
230         sizeof(sf_kernel_static_gen6),
231         NULL
232     },
233     {
234         "PS",
235         PS_KERNEL,
236         ps_kernel_static_gen6,
237         sizeof(ps_kernel_static_gen6),
238         NULL
239     },
240
241     {
242         "PS_SUBPIC",
243         PS_SUBPIC_KERNEL,
244         ps_subpic_kernel_static_gen6,
245         sizeof(ps_subpic_kernel_static_gen6),
246         NULL
247     }
248 };
249
250 static struct i965_kernel render_kernels_gen7[] = {
251     {
252         "SF",
253         SF_KERNEL,
254         sf_kernel_static_gen7,
255         sizeof(sf_kernel_static_gen7),
256         NULL
257     },
258     {
259         "PS",
260         PS_KERNEL,
261         ps_kernel_static_gen7,
262         sizeof(ps_kernel_static_gen7),
263         NULL
264     },
265
266     {
267         "PS_SUBPIC",
268         PS_SUBPIC_KERNEL,
269         ps_subpic_kernel_static_gen7,
270         sizeof(ps_subpic_kernel_static_gen7),
271         NULL
272     }
273 };
274
275 static struct i965_kernel render_kernels_gen7_haswell[] = {
276     {
277         "SF",
278         SF_KERNEL,
279         sf_kernel_static_gen7,
280         sizeof(sf_kernel_static_gen7),
281         NULL
282     },
283     {
284         "PS",
285         PS_KERNEL,
286         ps_kernel_static_gen7_haswell,
287         sizeof(ps_kernel_static_gen7_haswell),
288         NULL
289     },
290
291     {
292         "PS_SUBPIC",
293         PS_SUBPIC_KERNEL,
294         ps_subpic_kernel_static_gen7,
295         sizeof(ps_subpic_kernel_static_gen7),
296         NULL
297     }
298 };
299
300 #define URB_VS_ENTRIES        8
301 #define URB_VS_ENTRY_SIZE     1
302
303 #define URB_GS_ENTRIES        0
304 #define URB_GS_ENTRY_SIZE     0
305
306 #define URB_CLIP_ENTRIES      0
307 #define URB_CLIP_ENTRY_SIZE   0
308
309 #define URB_SF_ENTRIES        1
310 #define URB_SF_ENTRY_SIZE     2
311
312 #define URB_CS_ENTRIES        4
313 #define URB_CS_ENTRY_SIZE     4
314
315 static void
316 i965_render_vs_unit(VADriverContextP ctx)
317 {
318     struct i965_driver_data *i965 = i965_driver_data(ctx);
319     struct i965_render_state *render_state = &i965->render_state;
320     struct i965_vs_unit_state *vs_state;
321
322     dri_bo_map(render_state->vs.state, 1);
323     assert(render_state->vs.state->virtual);
324     vs_state = render_state->vs.state->virtual;
325     memset(vs_state, 0, sizeof(*vs_state));
326
327     if (IS_IRONLAKE(i965->intel.device_info))
328         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
329     else
330         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
331
332     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
333     vs_state->vs6.vs_enable = 0;
334     vs_state->vs6.vert_cache_disable = 1;
335     
336     dri_bo_unmap(render_state->vs.state);
337 }
338
339 static void
340 i965_render_sf_unit(VADriverContextP ctx)
341 {
342     struct i965_driver_data *i965 = i965_driver_data(ctx);
343     struct i965_render_state *render_state = &i965->render_state;
344     struct i965_sf_unit_state *sf_state;
345
346     dri_bo_map(render_state->sf.state, 1);
347     assert(render_state->sf.state->virtual);
348     sf_state = render_state->sf.state->virtual;
349     memset(sf_state, 0, sizeof(*sf_state));
350
351     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
352     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
353
354     sf_state->sf1.single_program_flow = 1; /* XXX */
355     sf_state->sf1.binding_table_entry_count = 0;
356     sf_state->sf1.thread_priority = 0;
357     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
358     sf_state->sf1.illegal_op_exception_enable = 1;
359     sf_state->sf1.mask_stack_exception_enable = 1;
360     sf_state->sf1.sw_exception_enable = 1;
361
362     /* scratch space is not used in our kernel */
363     sf_state->thread2.per_thread_scratch_space = 0;
364     sf_state->thread2.scratch_space_base_pointer = 0;
365
366     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
367     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
368     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
369     sf_state->thread3.urb_entry_read_offset = 0;
370     sf_state->thread3.dispatch_grf_start_reg = 3;
371
372     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
373     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
374     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
375     sf_state->thread4.stats_enable = 1;
376
377     sf_state->sf5.viewport_transform = 0; /* skip viewport */
378
379     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
380     sf_state->sf6.scissor = 0;
381
382     sf_state->sf7.trifan_pv = 2;
383
384     sf_state->sf6.dest_org_vbias = 0x8;
385     sf_state->sf6.dest_org_hbias = 0x8;
386
387     dri_bo_emit_reloc(render_state->sf.state,
388                       I915_GEM_DOMAIN_INSTRUCTION, 0,
389                       sf_state->thread0.grf_reg_count << 1,
390                       offsetof(struct i965_sf_unit_state, thread0),
391                       render_state->render_kernels[SF_KERNEL].bo);
392
393     dri_bo_unmap(render_state->sf.state);
394 }
395
396 static void 
397 i965_render_sampler(VADriverContextP ctx)
398 {
399     struct i965_driver_data *i965 = i965_driver_data(ctx);
400     struct i965_render_state *render_state = &i965->render_state;
401     struct i965_sampler_state *sampler_state;
402     int i;
403     
404     assert(render_state->wm.sampler_count > 0);
405     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
406
407     dri_bo_map(render_state->wm.sampler, 1);
408     assert(render_state->wm.sampler->virtual);
409     sampler_state = render_state->wm.sampler->virtual;
410     for (i = 0; i < render_state->wm.sampler_count; i++) {
411         memset(sampler_state, 0, sizeof(*sampler_state));
412         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
413         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
414         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
415         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
416         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
417         sampler_state++;
418     }
419
420     dri_bo_unmap(render_state->wm.sampler);
421 }
422 static void
423 i965_subpic_render_wm_unit(VADriverContextP ctx)
424 {
425     struct i965_driver_data *i965 = i965_driver_data(ctx);
426     struct i965_render_state *render_state = &i965->render_state;
427     struct i965_wm_unit_state *wm_state;
428
429     assert(render_state->wm.sampler);
430
431     dri_bo_map(render_state->wm.state, 1);
432     assert(render_state->wm.state->virtual);
433     wm_state = render_state->wm.state->virtual;
434     memset(wm_state, 0, sizeof(*wm_state));
435
436     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
437     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
438
439     wm_state->thread1.single_program_flow = 1; /* XXX */
440
441     if (IS_IRONLAKE(i965->intel.device_info))
442         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
443     else
444         wm_state->thread1.binding_table_entry_count = 7;
445
446     wm_state->thread2.scratch_space_base_pointer = 0;
447     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
448
449     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
450     wm_state->thread3.const_urb_entry_read_length = 4;
451     wm_state->thread3.const_urb_entry_read_offset = 0;
452     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
453     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
454
455     wm_state->wm4.stats_enable = 0;
456     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
457
458     if (IS_IRONLAKE(i965->intel.device_info)) {
459         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
460     } else {
461         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
462     }
463
464     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
465     wm_state->wm5.thread_dispatch_enable = 1;
466     wm_state->wm5.enable_16_pix = 1;
467     wm_state->wm5.enable_8_pix = 0;
468     wm_state->wm5.early_depth_test = 1;
469
470     dri_bo_emit_reloc(render_state->wm.state,
471                       I915_GEM_DOMAIN_INSTRUCTION, 0,
472                       wm_state->thread0.grf_reg_count << 1,
473                       offsetof(struct i965_wm_unit_state, thread0),
474                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
475
476     dri_bo_emit_reloc(render_state->wm.state,
477                       I915_GEM_DOMAIN_INSTRUCTION, 0,
478                       wm_state->wm4.sampler_count << 2,
479                       offsetof(struct i965_wm_unit_state, wm4),
480                       render_state->wm.sampler);
481
482     dri_bo_unmap(render_state->wm.state);
483 }
484
485
486 static void
487 i965_render_wm_unit(VADriverContextP ctx)
488 {
489     struct i965_driver_data *i965 = i965_driver_data(ctx);
490     struct i965_render_state *render_state = &i965->render_state;
491     struct i965_wm_unit_state *wm_state;
492
493     assert(render_state->wm.sampler);
494
495     dri_bo_map(render_state->wm.state, 1);
496     assert(render_state->wm.state->virtual);
497     wm_state = render_state->wm.state->virtual;
498     memset(wm_state, 0, sizeof(*wm_state));
499
500     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
501     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
502
503     wm_state->thread1.single_program_flow = 1; /* XXX */
504
505     if (IS_IRONLAKE(i965->intel.device_info))
506         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
507     else
508         wm_state->thread1.binding_table_entry_count = 7;
509
510     wm_state->thread2.scratch_space_base_pointer = 0;
511     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
512
513     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
514     wm_state->thread3.const_urb_entry_read_length = 4;
515     wm_state->thread3.const_urb_entry_read_offset = 0;
516     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
517     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
518
519     wm_state->wm4.stats_enable = 0;
520     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
521
522     if (IS_IRONLAKE(i965->intel.device_info)) {
523         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
524     } else {
525         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
526     }
527
528     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
529     wm_state->wm5.thread_dispatch_enable = 1;
530     wm_state->wm5.enable_16_pix = 1;
531     wm_state->wm5.enable_8_pix = 0;
532     wm_state->wm5.early_depth_test = 1;
533
534     dri_bo_emit_reloc(render_state->wm.state,
535                       I915_GEM_DOMAIN_INSTRUCTION, 0,
536                       wm_state->thread0.grf_reg_count << 1,
537                       offsetof(struct i965_wm_unit_state, thread0),
538                       render_state->render_kernels[PS_KERNEL].bo);
539
540     dri_bo_emit_reloc(render_state->wm.state,
541                       I915_GEM_DOMAIN_INSTRUCTION, 0,
542                       wm_state->wm4.sampler_count << 2,
543                       offsetof(struct i965_wm_unit_state, wm4),
544                       render_state->wm.sampler);
545
546     dri_bo_unmap(render_state->wm.state);
547 }
548
549 static void 
550 i965_render_cc_viewport(VADriverContextP ctx)
551 {
552     struct i965_driver_data *i965 = i965_driver_data(ctx);
553     struct i965_render_state *render_state = &i965->render_state;
554     struct i965_cc_viewport *cc_viewport;
555
556     dri_bo_map(render_state->cc.viewport, 1);
557     assert(render_state->cc.viewport->virtual);
558     cc_viewport = render_state->cc.viewport->virtual;
559     memset(cc_viewport, 0, sizeof(*cc_viewport));
560     
561     cc_viewport->min_depth = -1.e35;
562     cc_viewport->max_depth = 1.e35;
563
564     dri_bo_unmap(render_state->cc.viewport);
565 }
566
567 static void 
568 i965_subpic_render_cc_unit(VADriverContextP ctx)
569 {
570     struct i965_driver_data *i965 = i965_driver_data(ctx);
571     struct i965_render_state *render_state = &i965->render_state;
572     struct i965_cc_unit_state *cc_state;
573
574     assert(render_state->cc.viewport);
575
576     dri_bo_map(render_state->cc.state, 1);
577     assert(render_state->cc.state->virtual);
578     cc_state = render_state->cc.state->virtual;
579     memset(cc_state, 0, sizeof(*cc_state));
580
581     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
582     cc_state->cc2.depth_test = 0;       /* disable depth test */
583     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
584     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
585     cc_state->cc3.blend_enable = 1;     /* enable color blend */
586     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
587     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
588     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
589     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
590
591     cc_state->cc5.dither_enable = 0;    /* disable dither */
592     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
593     cc_state->cc5.statistics_enable = 1;
594     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
595     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
596     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
597
598     cc_state->cc6.clamp_post_alpha_blend = 0; 
599     cc_state->cc6.clamp_pre_alpha_blend  =0; 
600     
601     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
602     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
603     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
604     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
605    
606     /*alpha test reference*/
607     cc_state->cc7.alpha_ref.f =0.0 ;
608
609
610     dri_bo_emit_reloc(render_state->cc.state,
611                       I915_GEM_DOMAIN_INSTRUCTION, 0,
612                       0,
613                       offsetof(struct i965_cc_unit_state, cc4),
614                       render_state->cc.viewport);
615
616     dri_bo_unmap(render_state->cc.state);
617 }
618
619
620 static void 
621 i965_render_cc_unit(VADriverContextP ctx)
622 {
623     struct i965_driver_data *i965 = i965_driver_data(ctx);
624     struct i965_render_state *render_state = &i965->render_state;
625     struct i965_cc_unit_state *cc_state;
626
627     assert(render_state->cc.viewport);
628
629     dri_bo_map(render_state->cc.state, 1);
630     assert(render_state->cc.state->virtual);
631     cc_state = render_state->cc.state->virtual;
632     memset(cc_state, 0, sizeof(*cc_state));
633
634     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
635     cc_state->cc2.depth_test = 0;       /* disable depth test */
636     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
637     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
638     cc_state->cc3.blend_enable = 0;     /* disable color blend */
639     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
640     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
641
642     cc_state->cc5.dither_enable = 0;    /* disable dither */
643     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
644     cc_state->cc5.statistics_enable = 1;
645     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
646     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
647     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
648
649     dri_bo_emit_reloc(render_state->cc.state,
650                       I915_GEM_DOMAIN_INSTRUCTION, 0,
651                       0,
652                       offsetof(struct i965_cc_unit_state, cc4),
653                       render_state->cc.viewport);
654
655     dri_bo_unmap(render_state->cc.state);
656 }
657
658 static void
659 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
660 {
661     switch (tiling) {
662     case I915_TILING_NONE:
663         ss->ss3.tiled_surface = 0;
664         ss->ss3.tile_walk = 0;
665         break;
666     case I915_TILING_X:
667         ss->ss3.tiled_surface = 1;
668         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
669         break;
670     case I915_TILING_Y:
671         ss->ss3.tiled_surface = 1;
672         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
673         break;
674     }
675 }
676
677 static void
678 i965_render_set_surface_state(
679     struct i965_surface_state *ss,
680     dri_bo                    *bo,
681     unsigned long              offset,
682     unsigned int               width,
683     unsigned int               height,
684     unsigned int               pitch,
685     unsigned int               format,
686     unsigned int               flags
687 )
688 {
689     unsigned int tiling;
690     unsigned int swizzle;
691
692     memset(ss, 0, sizeof(*ss));
693
694     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
695     case VA_BOTTOM_FIELD:
696         ss->ss0.vert_line_stride_ofs = 1;
697         /* fall-through */
698     case VA_TOP_FIELD:
699         ss->ss0.vert_line_stride = 1;
700         height /= 2;
701         break;
702     }
703
704     ss->ss0.surface_type = I965_SURFACE_2D;
705     ss->ss0.surface_format = format;
706     ss->ss0.color_blend = 1;
707
708     ss->ss1.base_addr = bo->offset + offset;
709
710     ss->ss2.width = width - 1;
711     ss->ss2.height = height - 1;
712
713     ss->ss3.pitch = pitch - 1;
714
715     dri_bo_get_tiling(bo, &tiling, &swizzle);
716     i965_render_set_surface_tiling(ss, tiling);
717 }
718
719 static void
720 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
721 {
722    switch (tiling) {
723    case I915_TILING_NONE:
724       ss->ss0.tiled_surface = 0;
725       ss->ss0.tile_walk = 0;
726       break;
727    case I915_TILING_X:
728       ss->ss0.tiled_surface = 1;
729       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
730       break;
731    case I915_TILING_Y:
732       ss->ss0.tiled_surface = 1;
733       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
734       break;
735    }
736 }
737
738 /* Set "Shader Channel Select" */
739 void
740 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
741 {
742     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
743     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
744     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
745     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
746 }
747
748 static void
749 gen7_render_set_surface_state(
750     struct gen7_surface_state *ss,
751     dri_bo                    *bo,
752     unsigned long              offset,
753     int                        width,
754     int                        height,
755     int                        pitch,
756     int                        format,
757     unsigned int               flags
758 )
759 {
760     unsigned int tiling;
761     unsigned int swizzle;
762
763     memset(ss, 0, sizeof(*ss));
764
765     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
766     case VA_BOTTOM_FIELD:
767         ss->ss0.vert_line_stride_ofs = 1;
768         /* fall-through */
769     case VA_TOP_FIELD:
770         ss->ss0.vert_line_stride = 1;
771         height /= 2;
772         break;
773     }
774
775     ss->ss0.surface_type = I965_SURFACE_2D;
776     ss->ss0.surface_format = format;
777
778     ss->ss1.base_addr = bo->offset + offset;
779
780     ss->ss2.width = width - 1;
781     ss->ss2.height = height - 1;
782
783     ss->ss3.pitch = pitch - 1;
784
785     dri_bo_get_tiling(bo, &tiling, &swizzle);
786     gen7_render_set_surface_tiling(ss, tiling);
787 }
788
789
790 static void
791 i965_render_src_surface_state(
792     VADriverContextP ctx, 
793     int              index,
794     dri_bo          *region,
795     unsigned long    offset,
796     int              w,
797     int              h,
798     int              pitch,
799     int              format,
800     unsigned int     flags
801 )
802 {
803     struct i965_driver_data *i965 = i965_driver_data(ctx);  
804     struct i965_render_state *render_state = &i965->render_state;
805     void *ss;
806     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
807
808     assert(index < MAX_RENDER_SURFACES);
809
810     dri_bo_map(ss_bo, 1);
811     assert(ss_bo->virtual);
812     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
813
814     if (IS_GEN7(i965->intel.device_info)) {
815         gen7_render_set_surface_state(ss,
816                                       region, offset,
817                                       w, h,
818                                       pitch, format, flags);
819         if (IS_HASWELL(i965->intel.device_info))
820             gen7_render_set_surface_scs(ss);
821         dri_bo_emit_reloc(ss_bo,
822                           I915_GEM_DOMAIN_SAMPLER, 0,
823                           offset,
824                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
825                           region);
826     } else {
827         i965_render_set_surface_state(ss,
828                                       region, offset,
829                                       w, h,
830                                       pitch, format, flags);
831         dri_bo_emit_reloc(ss_bo,
832                           I915_GEM_DOMAIN_SAMPLER, 0,
833                           offset,
834                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
835                           region);
836     }
837
838     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
839     dri_bo_unmap(ss_bo);
840     render_state->wm.sampler_count++;
841 }
842
843 static void
844 i965_render_src_surfaces_state(
845     VADriverContextP ctx,
846     struct object_surface *obj_surface,
847     unsigned int     flags
848 )
849 {
850     int region_pitch;
851     int rw, rh;
852     dri_bo *region;
853
854     region_pitch = obj_surface->width;
855     rw = obj_surface->orig_width;
856     rh = obj_surface->orig_height;
857     region = obj_surface->bo;
858
859     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
860     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
861
862     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
863         return;
864
865     if (obj_surface->fourcc == VA_FOURCC_NV12) {
866         i965_render_src_surface_state(ctx, 3, region,
867                                       region_pitch * obj_surface->y_cb_offset,
868                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
869                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
870         i965_render_src_surface_state(ctx, 4, region,
871                                       region_pitch * obj_surface->y_cb_offset,
872                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
873                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
874     } else {
875         i965_render_src_surface_state(ctx, 3, region,
876                                       region_pitch * obj_surface->y_cb_offset,
877                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
878                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
879         i965_render_src_surface_state(ctx, 4, region,
880                                       region_pitch * obj_surface->y_cb_offset,
881                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
882                                       I965_SURFACEFORMAT_R8_UNORM, flags);
883         i965_render_src_surface_state(ctx, 5, region,
884                                       region_pitch * obj_surface->y_cr_offset,
885                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
886                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
887         i965_render_src_surface_state(ctx, 6, region,
888                                       region_pitch * obj_surface->y_cr_offset,
889                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
890                                       I965_SURFACEFORMAT_R8_UNORM, flags);
891     }
892 }
893
894 static void
895 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
896                                       struct object_surface *obj_surface)
897 {
898     dri_bo *subpic_region;
899     unsigned int index = obj_surface->subpic_render_idx;
900     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
901     struct object_image *obj_image = obj_subpic->obj_image;
902
903     assert(obj_surface);
904     assert(obj_surface->bo);
905     subpic_region = obj_image->bo;
906     /*subpicture surface*/
907     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
908     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
909 }
910
911 static void
912 i965_render_dest_surface_state(VADriverContextP ctx, int index)
913 {
914     struct i965_driver_data *i965 = i965_driver_data(ctx);  
915     struct i965_render_state *render_state = &i965->render_state;
916     struct intel_region *dest_region = render_state->draw_region;
917     void *ss;
918     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
919     int format;
920     assert(index < MAX_RENDER_SURFACES);
921
922     if (dest_region->cpp == 2) {
923         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
924     } else {
925         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
926     }
927
928     dri_bo_map(ss_bo, 1);
929     assert(ss_bo->virtual);
930     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
931
932     if (IS_GEN7(i965->intel.device_info)) {
933         gen7_render_set_surface_state(ss,
934                                       dest_region->bo, 0,
935                                       dest_region->width, dest_region->height,
936                                       dest_region->pitch, format, 0);
937         if (IS_HASWELL(i965->intel.device_info))
938             gen7_render_set_surface_scs(ss);
939         dri_bo_emit_reloc(ss_bo,
940                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
941                           0,
942                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
943                           dest_region->bo);
944     } else {
945         i965_render_set_surface_state(ss,
946                                       dest_region->bo, 0,
947                                       dest_region->width, dest_region->height,
948                                       dest_region->pitch, format, 0);
949         dri_bo_emit_reloc(ss_bo,
950                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
951                           0,
952                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
953                           dest_region->bo);
954     }
955
956     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
957     dri_bo_unmap(ss_bo);
958 }
959
960 static void
961 i965_fill_vertex_buffer(
962     VADriverContextP ctx,
963     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
964     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
965 )
966 {
967     struct i965_driver_data * const i965 = i965_driver_data(ctx);
968     float vb[12];
969
970     enum { X1, Y1, X2, Y2 };
971
972     static const unsigned int g_rotation_indices[][6] = {
973         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
974         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
975         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
976         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
977     };
978
979     const unsigned int * const rotation_indices =
980         g_rotation_indices[i965->rotation_attrib->value];
981
982     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
983     vb[1]  = tex_coords[rotation_indices[1]];
984     vb[2]  = vid_coords[X2];
985     vb[3]  = vid_coords[Y2];
986
987     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
988     vb[5]  = tex_coords[rotation_indices[3]];
989     vb[6]  = vid_coords[X1];
990     vb[7]  = vid_coords[Y2];
991
992     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
993     vb[9]  = tex_coords[rotation_indices[5]];
994     vb[10] = vid_coords[X1];
995     vb[11] = vid_coords[Y1];
996
997     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
998 }
999
1000 static void 
1001 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1002                                  struct object_surface *obj_surface,
1003                                  const VARectangle *output_rect)
1004 {    
1005     unsigned int index = obj_surface->subpic_render_idx;
1006     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1007     float tex_coords[4], vid_coords[4];
1008     VARectangle dst_rect;
1009
1010     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1011         dst_rect = obj_subpic->dst_rect;
1012     else {
1013         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1014         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1015         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1016         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1017         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1018         dst_rect.height = sy * obj_subpic->dst_rect.height;
1019     }
1020
1021     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1022     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1023     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1024     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1025
1026     vid_coords[0] = dst_rect.x;
1027     vid_coords[1] = dst_rect.y;
1028     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1029     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1030
1031     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1032 }
1033
1034 static void 
1035 i965_render_upload_vertex(
1036     VADriverContextP   ctx,
1037     struct object_surface *obj_surface,
1038     const VARectangle *src_rect,
1039     const VARectangle *dst_rect
1040 )
1041 {
1042     struct i965_driver_data *i965 = i965_driver_data(ctx);
1043     struct i965_render_state *render_state = &i965->render_state;
1044     struct intel_region *dest_region = render_state->draw_region;
1045     float tex_coords[4], vid_coords[4];
1046     int width, height;
1047
1048     width  = obj_surface->orig_width;
1049     height = obj_surface->orig_height;
1050
1051     tex_coords[0] = (float)src_rect->x / width;
1052     tex_coords[1] = (float)src_rect->y / height;
1053     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1054     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1055
1056     vid_coords[0] = dest_region->x + dst_rect->x;
1057     vid_coords[1] = dest_region->y + dst_rect->y;
1058     vid_coords[2] = vid_coords[0] + dst_rect->width;
1059     vid_coords[3] = vid_coords[1] + dst_rect->height;
1060
1061     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1062 }
1063
1064 #define PI  3.1415926
1065
1066 static void
1067 i965_render_upload_constants(VADriverContextP ctx,
1068                              struct object_surface *obj_surface,
1069                              unsigned int flags)
1070 {
1071     struct i965_driver_data *i965 = i965_driver_data(ctx);
1072     struct i965_render_state *render_state = &i965->render_state;
1073     unsigned short *constant_buffer;
1074     float *color_balance_base;
1075     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1076     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1077     float hue = (float)i965->hue_attrib->value / 180 * PI;
1078     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1079     float *yuv_to_rgb;
1080     const float* yuv_coefs;
1081     size_t coefs_length;
1082
1083     dri_bo_map(render_state->curbe.bo, 1);
1084     assert(render_state->curbe.bo->virtual);
1085     constant_buffer = render_state->curbe.bo->virtual;
1086
1087     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1088         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1089
1090         constant_buffer[0] = 2;
1091     } else {
1092         if (obj_surface->fourcc == VA_FOURCC_NV12)
1093             constant_buffer[0] = 1;
1094         else
1095             constant_buffer[0] = 0;
1096     }
1097
1098     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1099         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1100         i965->hue_attrib->value == DEFAULT_HUE &&
1101         i965->saturation_attrib->value == DEFAULT_SATURATION)
1102         constant_buffer[1] = 1; /* skip color balance transformation */
1103     else
1104         constant_buffer[1] = 0;
1105
1106     color_balance_base = (float *)constant_buffer + 4;
1107     *color_balance_base++ = contrast;
1108     *color_balance_base++ = brightness;
1109     *color_balance_base++ = cos(hue) * contrast * saturation;
1110     *color_balance_base++ = sin(hue) * contrast * saturation;
1111
1112     yuv_to_rgb = (float *)constant_buffer + 8;
1113     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(flags & VA_SRC_COLOR_MASK),
1114                                              &coefs_length);
1115     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
1116
1117     dri_bo_unmap(render_state->curbe.bo);
1118 }
1119
1120 static void
1121 i965_subpic_render_upload_constants(VADriverContextP ctx,
1122                                     struct object_surface *obj_surface)
1123 {
1124     struct i965_driver_data *i965 = i965_driver_data(ctx);
1125     struct i965_render_state *render_state = &i965->render_state;
1126     float *constant_buffer;
1127     float global_alpha = 1.0;
1128     unsigned int index = obj_surface->subpic_render_idx;
1129     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1130     
1131     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1132         global_alpha = obj_subpic->global_alpha;
1133     }
1134
1135     dri_bo_map(render_state->curbe.bo, 1);
1136
1137     assert(render_state->curbe.bo->virtual);
1138     constant_buffer = render_state->curbe.bo->virtual;
1139     *constant_buffer = global_alpha;
1140
1141     dri_bo_unmap(render_state->curbe.bo);
1142 }
1143  
1144 static void
1145 i965_surface_render_state_setup(
1146     VADriverContextP   ctx,
1147     struct object_surface *obj_surface,
1148     const VARectangle *src_rect,
1149     const VARectangle *dst_rect,
1150     unsigned int       flags
1151 )
1152 {
1153     i965_render_vs_unit(ctx);
1154     i965_render_sf_unit(ctx);
1155     i965_render_dest_surface_state(ctx, 0);
1156     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1157     i965_render_sampler(ctx);
1158     i965_render_wm_unit(ctx);
1159     i965_render_cc_viewport(ctx);
1160     i965_render_cc_unit(ctx);
1161     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1162     i965_render_upload_constants(ctx, obj_surface, flags);
1163 }
1164
1165 static void
1166 i965_subpic_render_state_setup(
1167     VADriverContextP   ctx,
1168     struct object_surface *obj_surface,
1169     const VARectangle *src_rect,
1170     const VARectangle *dst_rect
1171 )
1172 {
1173     i965_render_vs_unit(ctx);
1174     i965_render_sf_unit(ctx);
1175     i965_render_dest_surface_state(ctx, 0);
1176     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1177     i965_render_sampler(ctx);
1178     i965_subpic_render_wm_unit(ctx);
1179     i965_render_cc_viewport(ctx);
1180     i965_subpic_render_cc_unit(ctx);
1181     i965_subpic_render_upload_constants(ctx, obj_surface);
1182     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1183 }
1184
1185
1186 static void
1187 i965_render_pipeline_select(VADriverContextP ctx)
1188 {
1189     struct i965_driver_data *i965 = i965_driver_data(ctx);
1190     struct intel_batchbuffer *batch = i965->batch;
1191  
1192     BEGIN_BATCH(batch, 1);
1193     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1194     ADVANCE_BATCH(batch);
1195 }
1196
1197 static void
1198 i965_render_state_sip(VADriverContextP ctx)
1199 {
1200     struct i965_driver_data *i965 = i965_driver_data(ctx);
1201     struct intel_batchbuffer *batch = i965->batch;
1202
1203     BEGIN_BATCH(batch, 2);
1204     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1205     OUT_BATCH(batch, 0);
1206     ADVANCE_BATCH(batch);
1207 }
1208
1209 static void
1210 i965_render_state_base_address(VADriverContextP ctx)
1211 {
1212     struct i965_driver_data *i965 = i965_driver_data(ctx);
1213     struct intel_batchbuffer *batch = i965->batch;
1214     struct i965_render_state *render_state = &i965->render_state;
1215
1216     if (IS_IRONLAKE(i965->intel.device_info)) {
1217         BEGIN_BATCH(batch, 8);
1218         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1219         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1220         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1221         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1222         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1223         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1224         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1225         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1226         ADVANCE_BATCH(batch);
1227     } else {
1228         BEGIN_BATCH(batch, 6);
1229         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1230         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1231         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1232         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1233         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1234         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1235         ADVANCE_BATCH(batch);
1236     }
1237 }
1238
1239 static void
1240 i965_render_binding_table_pointers(VADriverContextP ctx)
1241 {
1242     struct i965_driver_data *i965 = i965_driver_data(ctx);
1243     struct intel_batchbuffer *batch = i965->batch;
1244
1245     BEGIN_BATCH(batch, 6);
1246     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1247     OUT_BATCH(batch, 0); /* vs */
1248     OUT_BATCH(batch, 0); /* gs */
1249     OUT_BATCH(batch, 0); /* clip */
1250     OUT_BATCH(batch, 0); /* sf */
1251     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1252     ADVANCE_BATCH(batch);
1253 }
1254
1255 static void 
1256 i965_render_constant_color(VADriverContextP ctx)
1257 {
1258     struct i965_driver_data *i965 = i965_driver_data(ctx);
1259     struct intel_batchbuffer *batch = i965->batch;
1260
1261     BEGIN_BATCH(batch, 5);
1262     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1263     OUT_BATCH(batch, float_to_uint(1.0));
1264     OUT_BATCH(batch, float_to_uint(0.0));
1265     OUT_BATCH(batch, float_to_uint(1.0));
1266     OUT_BATCH(batch, float_to_uint(1.0));
1267     ADVANCE_BATCH(batch);
1268 }
1269
1270 static void
1271 i965_render_pipelined_pointers(VADriverContextP ctx)
1272 {
1273     struct i965_driver_data *i965 = i965_driver_data(ctx);
1274     struct intel_batchbuffer *batch = i965->batch;
1275     struct i965_render_state *render_state = &i965->render_state;
1276
1277     BEGIN_BATCH(batch, 7);
1278     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1279     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1280     OUT_BATCH(batch, 0);  /* disable GS */
1281     OUT_BATCH(batch, 0);  /* disable CLIP */
1282     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1283     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1284     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1285     ADVANCE_BATCH(batch);
1286 }
1287
1288 static void
1289 i965_render_urb_layout(VADriverContextP ctx)
1290 {
1291     struct i965_driver_data *i965 = i965_driver_data(ctx);
1292     struct intel_batchbuffer *batch = i965->batch;
1293     int urb_vs_start, urb_vs_size;
1294     int urb_gs_start, urb_gs_size;
1295     int urb_clip_start, urb_clip_size;
1296     int urb_sf_start, urb_sf_size;
1297     int urb_cs_start, urb_cs_size;
1298
1299     urb_vs_start = 0;
1300     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1301     urb_gs_start = urb_vs_start + urb_vs_size;
1302     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1303     urb_clip_start = urb_gs_start + urb_gs_size;
1304     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1305     urb_sf_start = urb_clip_start + urb_clip_size;
1306     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1307     urb_cs_start = urb_sf_start + urb_sf_size;
1308     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1309
1310     BEGIN_BATCH(batch, 3);
1311     OUT_BATCH(batch, 
1312               CMD_URB_FENCE |
1313               UF0_CS_REALLOC |
1314               UF0_SF_REALLOC |
1315               UF0_CLIP_REALLOC |
1316               UF0_GS_REALLOC |
1317               UF0_VS_REALLOC |
1318               1);
1319     OUT_BATCH(batch, 
1320               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1321               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1322               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1323     OUT_BATCH(batch,
1324               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1325               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1326     ADVANCE_BATCH(batch);
1327 }
1328
1329 static void 
1330 i965_render_cs_urb_layout(VADriverContextP ctx)
1331 {
1332     struct i965_driver_data *i965 = i965_driver_data(ctx);
1333     struct intel_batchbuffer *batch = i965->batch;
1334
1335     BEGIN_BATCH(batch, 2);
1336     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1337     OUT_BATCH(batch,
1338               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1339               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1340     ADVANCE_BATCH(batch);
1341 }
1342
1343 static void
1344 i965_render_constant_buffer(VADriverContextP ctx)
1345 {
1346     struct i965_driver_data *i965 = i965_driver_data(ctx);
1347     struct intel_batchbuffer *batch = i965->batch;
1348     struct i965_render_state *render_state = &i965->render_state;
1349
1350     BEGIN_BATCH(batch, 2);
1351     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1352     OUT_RELOC(batch, render_state->curbe.bo,
1353               I915_GEM_DOMAIN_INSTRUCTION, 0,
1354               URB_CS_ENTRY_SIZE - 1);
1355     ADVANCE_BATCH(batch);    
1356 }
1357
1358 static void
1359 i965_render_drawing_rectangle(VADriverContextP ctx)
1360 {
1361     struct i965_driver_data *i965 = i965_driver_data(ctx);
1362     struct intel_batchbuffer *batch = i965->batch;
1363     struct i965_render_state *render_state = &i965->render_state;
1364     struct intel_region *dest_region = render_state->draw_region;
1365
1366     BEGIN_BATCH(batch, 4);
1367     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1368     OUT_BATCH(batch, 0x00000000);
1369     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1370     OUT_BATCH(batch, 0x00000000);         
1371     ADVANCE_BATCH(batch);
1372 }
1373
1374 static void
1375 i965_render_vertex_elements(VADriverContextP ctx)
1376 {
1377     struct i965_driver_data *i965 = i965_driver_data(ctx);
1378     struct intel_batchbuffer *batch = i965->batch;
1379
1380     if (IS_IRONLAKE(i965->intel.device_info)) {
1381         BEGIN_BATCH(batch, 5);
1382         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1383         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1384         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1385                   VE0_VALID |
1386                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1387                   (0 << VE0_OFFSET_SHIFT));
1388         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1389                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1390                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1391                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1392         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1393         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1394                   VE0_VALID |
1395                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1396                   (8 << VE0_OFFSET_SHIFT));
1397         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1398                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1399                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1400                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1401         ADVANCE_BATCH(batch);
1402     } else {
1403         BEGIN_BATCH(batch, 5);
1404         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1405         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1406         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1407                   VE0_VALID |
1408                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1409                   (0 << VE0_OFFSET_SHIFT));
1410         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1411                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1412                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1413                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1414                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1415         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1416         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1417                   VE0_VALID |
1418                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1419                   (8 << VE0_OFFSET_SHIFT));
1420         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1421                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1422                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1423                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1424                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1425         ADVANCE_BATCH(batch);
1426     }
1427 }
1428
1429 static void
1430 i965_render_upload_image_palette(
1431     VADriverContextP ctx,
1432     struct object_image *obj_image,
1433     unsigned int     alpha
1434 )
1435 {
1436     struct i965_driver_data *i965 = i965_driver_data(ctx);
1437     struct intel_batchbuffer *batch = i965->batch;
1438     unsigned int i;
1439
1440     assert(obj_image);
1441
1442     if (!obj_image)
1443         return;
1444
1445     if (obj_image->image.num_palette_entries == 0)
1446         return;
1447
1448     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1449     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1450     /*fill palette*/
1451     //int32_t out[16]; //0-23:color 23-31:alpha
1452     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1453         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1454     ADVANCE_BATCH(batch);
1455 }
1456
1457 static void
1458 i965_render_startup(VADriverContextP ctx)
1459 {
1460     struct i965_driver_data *i965 = i965_driver_data(ctx);
1461     struct intel_batchbuffer *batch = i965->batch;
1462     struct i965_render_state *render_state = &i965->render_state;
1463
1464     BEGIN_BATCH(batch, 11);
1465     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1466     OUT_BATCH(batch, 
1467               (0 << VB0_BUFFER_INDEX_SHIFT) |
1468               VB0_VERTEXDATA |
1469               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1470     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1471
1472     if (IS_IRONLAKE(i965->intel.device_info))
1473         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1474     else
1475         OUT_BATCH(batch, 3);
1476
1477     OUT_BATCH(batch, 0);
1478
1479     OUT_BATCH(batch, 
1480               CMD_3DPRIMITIVE |
1481               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1482               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1483               (0 << 9) |
1484               4);
1485     OUT_BATCH(batch, 3); /* vertex count per instance */
1486     OUT_BATCH(batch, 0); /* start vertex offset */
1487     OUT_BATCH(batch, 1); /* single instance */
1488     OUT_BATCH(batch, 0); /* start instance location */
1489     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1490     ADVANCE_BATCH(batch);
1491 }
1492
1493 static void 
1494 i965_clear_dest_region(VADriverContextP ctx)
1495 {
1496     struct i965_driver_data *i965 = i965_driver_data(ctx);
1497     struct intel_batchbuffer *batch = i965->batch;
1498     struct i965_render_state *render_state = &i965->render_state;
1499     struct intel_region *dest_region = render_state->draw_region;
1500     unsigned int blt_cmd, br13;
1501     int pitch;
1502
1503     blt_cmd = XY_COLOR_BLT_CMD;
1504     br13 = 0xf0 << 16;
1505     pitch = dest_region->pitch;
1506
1507     if (dest_region->cpp == 4) {
1508         br13 |= BR13_8888;
1509         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1510     } else {
1511         assert(dest_region->cpp == 2);
1512         br13 |= BR13_565;
1513     }
1514
1515     if (dest_region->tiling != I915_TILING_NONE) {
1516         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1517         pitch /= 4;
1518     }
1519
1520     br13 |= pitch;
1521
1522     if (IS_GEN6(i965->intel.device_info) ||
1523         IS_GEN7(i965->intel.device_info)) {
1524         intel_batchbuffer_start_atomic_blt(batch, 24);
1525         BEGIN_BLT_BATCH(batch, 6);
1526     } else {
1527         intel_batchbuffer_start_atomic(batch, 24);
1528         BEGIN_BATCH(batch, 6);
1529     }
1530
1531     OUT_BATCH(batch, blt_cmd);
1532     OUT_BATCH(batch, br13);
1533     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1534     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1535               (dest_region->x + dest_region->width));
1536     OUT_RELOC(batch, dest_region->bo, 
1537               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1538               0);
1539     OUT_BATCH(batch, 0x0);
1540     ADVANCE_BATCH(batch);
1541     intel_batchbuffer_end_atomic(batch);
1542 }
1543
1544 static void
1545 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1546 {
1547     struct i965_driver_data *i965 = i965_driver_data(ctx);
1548     struct intel_batchbuffer *batch = i965->batch;
1549
1550     i965_clear_dest_region(ctx);
1551     intel_batchbuffer_start_atomic(batch, 0x1000);
1552     intel_batchbuffer_emit_mi_flush(batch);
1553     i965_render_pipeline_select(ctx);
1554     i965_render_state_sip(ctx);
1555     i965_render_state_base_address(ctx);
1556     i965_render_binding_table_pointers(ctx);
1557     i965_render_constant_color(ctx);
1558     i965_render_pipelined_pointers(ctx);
1559     i965_render_urb_layout(ctx);
1560     i965_render_cs_urb_layout(ctx);
1561     i965_render_constant_buffer(ctx);
1562     i965_render_drawing_rectangle(ctx);
1563     i965_render_vertex_elements(ctx);
1564     i965_render_startup(ctx);
1565     intel_batchbuffer_end_atomic(batch);
1566 }
1567
1568 static void
1569 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1570 {
1571     struct i965_driver_data *i965 = i965_driver_data(ctx);
1572     struct intel_batchbuffer *batch = i965->batch;
1573
1574     intel_batchbuffer_start_atomic(batch, 0x1000);
1575     intel_batchbuffer_emit_mi_flush(batch);
1576     i965_render_pipeline_select(ctx);
1577     i965_render_state_sip(ctx);
1578     i965_render_state_base_address(ctx);
1579     i965_render_binding_table_pointers(ctx);
1580     i965_render_constant_color(ctx);
1581     i965_render_pipelined_pointers(ctx);
1582     i965_render_urb_layout(ctx);
1583     i965_render_cs_urb_layout(ctx);
1584     i965_render_constant_buffer(ctx);
1585     i965_render_drawing_rectangle(ctx);
1586     i965_render_vertex_elements(ctx);
1587     i965_render_startup(ctx);
1588     intel_batchbuffer_end_atomic(batch);
1589 }
1590
1591
1592 static void 
1593 i965_render_initialize(VADriverContextP ctx)
1594 {
1595     struct i965_driver_data *i965 = i965_driver_data(ctx);
1596     struct i965_render_state *render_state = &i965->render_state;
1597     dri_bo *bo;
1598
1599     /* VERTEX BUFFER */
1600     dri_bo_unreference(render_state->vb.vertex_buffer);
1601     bo = dri_bo_alloc(i965->intel.bufmgr,
1602                       "vertex buffer",
1603                       4096,
1604                       4096);
1605     assert(bo);
1606     render_state->vb.vertex_buffer = bo;
1607
1608     /* VS */
1609     dri_bo_unreference(render_state->vs.state);
1610     bo = dri_bo_alloc(i965->intel.bufmgr,
1611                       "vs state",
1612                       sizeof(struct i965_vs_unit_state),
1613                       64);
1614     assert(bo);
1615     render_state->vs.state = bo;
1616
1617     /* GS */
1618     /* CLIP */
1619     /* SF */
1620     dri_bo_unreference(render_state->sf.state);
1621     bo = dri_bo_alloc(i965->intel.bufmgr,
1622                       "sf state",
1623                       sizeof(struct i965_sf_unit_state),
1624                       64);
1625     assert(bo);
1626     render_state->sf.state = bo;
1627
1628     /* WM */
1629     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1630     bo = dri_bo_alloc(i965->intel.bufmgr,
1631                       "surface state & binding table",
1632                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1633                       4096);
1634     assert(bo);
1635     render_state->wm.surface_state_binding_table_bo = bo;
1636
1637     dri_bo_unreference(render_state->wm.sampler);
1638     bo = dri_bo_alloc(i965->intel.bufmgr,
1639                       "sampler state",
1640                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1641                       64);
1642     assert(bo);
1643     render_state->wm.sampler = bo;
1644     render_state->wm.sampler_count = 0;
1645
1646     dri_bo_unreference(render_state->wm.state);
1647     bo = dri_bo_alloc(i965->intel.bufmgr,
1648                       "wm state",
1649                       sizeof(struct i965_wm_unit_state),
1650                       64);
1651     assert(bo);
1652     render_state->wm.state = bo;
1653
1654     /* COLOR CALCULATOR */
1655     dri_bo_unreference(render_state->cc.state);
1656     bo = dri_bo_alloc(i965->intel.bufmgr,
1657                       "color calc state",
1658                       sizeof(struct i965_cc_unit_state),
1659                       64);
1660     assert(bo);
1661     render_state->cc.state = bo;
1662
1663     dri_bo_unreference(render_state->cc.viewport);
1664     bo = dri_bo_alloc(i965->intel.bufmgr,
1665                       "cc viewport",
1666                       sizeof(struct i965_cc_viewport),
1667                       64);
1668     assert(bo);
1669     render_state->cc.viewport = bo;
1670 }
1671
1672 static void
1673 i965_render_put_surface(
1674     VADriverContextP   ctx,
1675     struct object_surface *obj_surface,
1676     const VARectangle *src_rect,
1677     const VARectangle *dst_rect,
1678     unsigned int       flags
1679 )
1680 {
1681     struct i965_driver_data *i965 = i965_driver_data(ctx);
1682     struct intel_batchbuffer *batch = i965->batch;
1683
1684     i965_render_initialize(ctx);
1685     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1686     i965_surface_render_pipeline_setup(ctx);
1687     intel_batchbuffer_flush(batch);
1688 }
1689
1690 static void
1691 i965_render_put_subpicture(
1692     VADriverContextP   ctx,
1693     struct object_surface *obj_surface,
1694     const VARectangle *src_rect,
1695     const VARectangle *dst_rect
1696 )
1697 {
1698     struct i965_driver_data *i965 = i965_driver_data(ctx);
1699     struct intel_batchbuffer *batch = i965->batch;
1700     unsigned int index = obj_surface->subpic_render_idx;
1701     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1702
1703     assert(obj_subpic);
1704
1705     i965_render_initialize(ctx);
1706     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1707     i965_subpic_render_pipeline_setup(ctx);
1708     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1709     intel_batchbuffer_flush(batch);
1710 }
1711
1712 /*
1713  * for GEN6+
1714  */
1715 static void 
1716 gen6_render_initialize(VADriverContextP ctx)
1717 {
1718     struct i965_driver_data *i965 = i965_driver_data(ctx);
1719     struct i965_render_state *render_state = &i965->render_state;
1720     dri_bo *bo;
1721
1722     /* VERTEX BUFFER */
1723     dri_bo_unreference(render_state->vb.vertex_buffer);
1724     bo = dri_bo_alloc(i965->intel.bufmgr,
1725                       "vertex buffer",
1726                       4096,
1727                       4096);
1728     assert(bo);
1729     render_state->vb.vertex_buffer = bo;
1730
1731     /* WM */
1732     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1733     bo = dri_bo_alloc(i965->intel.bufmgr,
1734                       "surface state & binding table",
1735                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1736                       4096);
1737     assert(bo);
1738     render_state->wm.surface_state_binding_table_bo = bo;
1739
1740     dri_bo_unreference(render_state->wm.sampler);
1741     bo = dri_bo_alloc(i965->intel.bufmgr,
1742                       "sampler state",
1743                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1744                       4096);
1745     assert(bo);
1746     render_state->wm.sampler = bo;
1747     render_state->wm.sampler_count = 0;
1748
1749     /* COLOR CALCULATOR */
1750     dri_bo_unreference(render_state->cc.state);
1751     bo = dri_bo_alloc(i965->intel.bufmgr,
1752                       "color calc state",
1753                       sizeof(struct gen6_color_calc_state),
1754                       4096);
1755     assert(bo);
1756     render_state->cc.state = bo;
1757
1758     /* CC VIEWPORT */
1759     dri_bo_unreference(render_state->cc.viewport);
1760     bo = dri_bo_alloc(i965->intel.bufmgr,
1761                       "cc viewport",
1762                       sizeof(struct i965_cc_viewport),
1763                       4096);
1764     assert(bo);
1765     render_state->cc.viewport = bo;
1766
1767     /* BLEND STATE */
1768     dri_bo_unreference(render_state->cc.blend);
1769     bo = dri_bo_alloc(i965->intel.bufmgr,
1770                       "blend state",
1771                       sizeof(struct gen6_blend_state),
1772                       4096);
1773     assert(bo);
1774     render_state->cc.blend = bo;
1775
1776     /* DEPTH & STENCIL STATE */
1777     dri_bo_unreference(render_state->cc.depth_stencil);
1778     bo = dri_bo_alloc(i965->intel.bufmgr,
1779                       "depth & stencil state",
1780                       sizeof(struct gen6_depth_stencil_state),
1781                       4096);
1782     assert(bo);
1783     render_state->cc.depth_stencil = bo;
1784 }
1785
1786 static void
1787 gen6_render_color_calc_state(VADriverContextP ctx)
1788 {
1789     struct i965_driver_data *i965 = i965_driver_data(ctx);
1790     struct i965_render_state *render_state = &i965->render_state;
1791     struct gen6_color_calc_state *color_calc_state;
1792     
1793     dri_bo_map(render_state->cc.state, 1);
1794     assert(render_state->cc.state->virtual);
1795     color_calc_state = render_state->cc.state->virtual;
1796     memset(color_calc_state, 0, sizeof(*color_calc_state));
1797     color_calc_state->constant_r = 1.0;
1798     color_calc_state->constant_g = 0.0;
1799     color_calc_state->constant_b = 1.0;
1800     color_calc_state->constant_a = 1.0;
1801     dri_bo_unmap(render_state->cc.state);
1802 }
1803
1804 static void
1805 gen6_render_blend_state(VADriverContextP ctx)
1806 {
1807     struct i965_driver_data *i965 = i965_driver_data(ctx);
1808     struct i965_render_state *render_state = &i965->render_state;
1809     struct gen6_blend_state *blend_state;
1810     
1811     dri_bo_map(render_state->cc.blend, 1);
1812     assert(render_state->cc.blend->virtual);
1813     blend_state = render_state->cc.blend->virtual;
1814     memset(blend_state, 0, sizeof(*blend_state));
1815     blend_state->blend1.logic_op_enable = 1;
1816     blend_state->blend1.logic_op_func = 0xc;
1817     dri_bo_unmap(render_state->cc.blend);
1818 }
1819
1820 static void
1821 gen6_render_depth_stencil_state(VADriverContextP ctx)
1822 {
1823     struct i965_driver_data *i965 = i965_driver_data(ctx);
1824     struct i965_render_state *render_state = &i965->render_state;
1825     struct gen6_depth_stencil_state *depth_stencil_state;
1826     
1827     dri_bo_map(render_state->cc.depth_stencil, 1);
1828     assert(render_state->cc.depth_stencil->virtual);
1829     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1830     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1831     dri_bo_unmap(render_state->cc.depth_stencil);
1832 }
1833
1834 static void
1835 gen6_render_setup_states(
1836     VADriverContextP   ctx,
1837     struct object_surface *obj_surface,
1838     const VARectangle *src_rect,
1839     const VARectangle *dst_rect,
1840     unsigned int       flags
1841 )
1842 {
1843     i965_render_dest_surface_state(ctx, 0);
1844     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1845     i965_render_sampler(ctx);
1846     i965_render_cc_viewport(ctx);
1847     gen6_render_color_calc_state(ctx);
1848     gen6_render_blend_state(ctx);
1849     gen6_render_depth_stencil_state(ctx);
1850     i965_render_upload_constants(ctx, obj_surface, flags);
1851     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1852 }
1853
1854 static void
1855 gen6_emit_invarient_states(VADriverContextP ctx)
1856 {
1857     struct i965_driver_data *i965 = i965_driver_data(ctx);
1858     struct intel_batchbuffer *batch = i965->batch;
1859
1860     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1861
1862     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1863     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1864               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1865     OUT_BATCH(batch, 0);
1866
1867     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1868     OUT_BATCH(batch, 1);
1869
1870     /* Set system instruction pointer */
1871     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1872     OUT_BATCH(batch, 0);
1873 }
1874
1875 static void
1876 gen6_emit_state_base_address(VADriverContextP ctx)
1877 {
1878     struct i965_driver_data *i965 = i965_driver_data(ctx);
1879     struct intel_batchbuffer *batch = i965->batch;
1880     struct i965_render_state *render_state = &i965->render_state;
1881
1882     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1883     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1884     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1885     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1886     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1887     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1888     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1889     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1890     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1891     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1892 }
1893
1894 static void
1895 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1896 {
1897     struct i965_driver_data *i965 = i965_driver_data(ctx);
1898     struct intel_batchbuffer *batch = i965->batch;
1899     struct i965_render_state *render_state = &i965->render_state;
1900
1901     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1902               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1903               (4 - 2));
1904     OUT_BATCH(batch, 0);
1905     OUT_BATCH(batch, 0);
1906     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1907 }
1908
1909 static void
1910 gen6_emit_urb(VADriverContextP ctx)
1911 {
1912     struct i965_driver_data *i965 = i965_driver_data(ctx);
1913     struct intel_batchbuffer *batch = i965->batch;
1914
1915     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1916     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1917               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1918     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1919               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1920 }
1921
1922 static void
1923 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct intel_batchbuffer *batch = i965->batch;
1927     struct i965_render_state *render_state = &i965->render_state;
1928
1929     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1930     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1931     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1932     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1933 }
1934
1935 static void
1936 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1937 {
1938     struct i965_driver_data *i965 = i965_driver_data(ctx);
1939     struct intel_batchbuffer *batch = i965->batch;
1940     struct i965_render_state *render_state = &i965->render_state;
1941
1942     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1943               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1944               (4 - 2));
1945     OUT_BATCH(batch, 0); /* VS */
1946     OUT_BATCH(batch, 0); /* GS */
1947     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1948 }
1949
1950 static void
1951 gen6_emit_binding_table(VADriverContextP ctx)
1952 {
1953     struct i965_driver_data *i965 = i965_driver_data(ctx);
1954     struct intel_batchbuffer *batch = i965->batch;
1955
1956     /* Binding table pointers */
1957     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1958               GEN6_BINDING_TABLE_MODIFY_PS |
1959               (4 - 2));
1960     OUT_BATCH(batch, 0);                /* vs */
1961     OUT_BATCH(batch, 0);                /* gs */
1962     /* Only the PS uses the binding table */
1963     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1964 }
1965
1966 static void
1967 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1968 {
1969     struct i965_driver_data *i965 = i965_driver_data(ctx);
1970     struct intel_batchbuffer *batch = i965->batch;
1971
1972     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1973     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1974               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1975     OUT_BATCH(batch, 0);
1976     OUT_BATCH(batch, 0);
1977     OUT_BATCH(batch, 0);
1978     OUT_BATCH(batch, 0);
1979     OUT_BATCH(batch, 0);
1980
1981     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1982     OUT_BATCH(batch, 0);
1983 }
1984
1985 static void
1986 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1987 {
1988     i965_render_drawing_rectangle(ctx);
1989 }
1990
1991 static void 
1992 gen6_emit_vs_state(VADriverContextP ctx)
1993 {
1994     struct i965_driver_data *i965 = i965_driver_data(ctx);
1995     struct intel_batchbuffer *batch = i965->batch;
1996
1997     /* disable VS constant buffer */
1998     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1999     OUT_BATCH(batch, 0);
2000     OUT_BATCH(batch, 0);
2001     OUT_BATCH(batch, 0);
2002     OUT_BATCH(batch, 0);
2003         
2004     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2005     OUT_BATCH(batch, 0); /* without VS kernel */
2006     OUT_BATCH(batch, 0);
2007     OUT_BATCH(batch, 0);
2008     OUT_BATCH(batch, 0);
2009     OUT_BATCH(batch, 0); /* pass-through */
2010 }
2011
2012 static void 
2013 gen6_emit_gs_state(VADriverContextP ctx)
2014 {
2015     struct i965_driver_data *i965 = i965_driver_data(ctx);
2016     struct intel_batchbuffer *batch = i965->batch;
2017
2018     /* disable GS constant buffer */
2019     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2020     OUT_BATCH(batch, 0);
2021     OUT_BATCH(batch, 0);
2022     OUT_BATCH(batch, 0);
2023     OUT_BATCH(batch, 0);
2024         
2025     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2026     OUT_BATCH(batch, 0); /* without GS kernel */
2027     OUT_BATCH(batch, 0);
2028     OUT_BATCH(batch, 0);
2029     OUT_BATCH(batch, 0);
2030     OUT_BATCH(batch, 0);
2031     OUT_BATCH(batch, 0); /* pass-through */
2032 }
2033
2034 static void 
2035 gen6_emit_clip_state(VADriverContextP ctx)
2036 {
2037     struct i965_driver_data *i965 = i965_driver_data(ctx);
2038     struct intel_batchbuffer *batch = i965->batch;
2039
2040     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2041     OUT_BATCH(batch, 0);
2042     OUT_BATCH(batch, 0); /* pass-through */
2043     OUT_BATCH(batch, 0);
2044 }
2045
2046 static void 
2047 gen6_emit_sf_state(VADriverContextP ctx)
2048 {
2049     struct i965_driver_data *i965 = i965_driver_data(ctx);
2050     struct intel_batchbuffer *batch = i965->batch;
2051
2052     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2053     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2054               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2055               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2056     OUT_BATCH(batch, 0);
2057     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2058     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2059     OUT_BATCH(batch, 0);
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, 0);
2063     OUT_BATCH(batch, 0); /* DW9 */
2064     OUT_BATCH(batch, 0);
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch, 0);
2067     OUT_BATCH(batch, 0);
2068     OUT_BATCH(batch, 0); /* DW14 */
2069     OUT_BATCH(batch, 0);
2070     OUT_BATCH(batch, 0);
2071     OUT_BATCH(batch, 0);
2072     OUT_BATCH(batch, 0);
2073     OUT_BATCH(batch, 0); /* DW19 */
2074 }
2075
2076 static void 
2077 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2078 {
2079     struct i965_driver_data *i965 = i965_driver_data(ctx);
2080     struct intel_batchbuffer *batch = i965->batch;
2081     struct i965_render_state *render_state = &i965->render_state;
2082
2083     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2084               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2085               (5 - 2));
2086     OUT_RELOC(batch, 
2087               render_state->curbe.bo,
2088               I915_GEM_DOMAIN_INSTRUCTION, 0,
2089               (URB_CS_ENTRY_SIZE-1));
2090     OUT_BATCH(batch, 0);
2091     OUT_BATCH(batch, 0);
2092     OUT_BATCH(batch, 0);
2093
2094     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2095     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2096               I915_GEM_DOMAIN_INSTRUCTION, 0,
2097               0);
2098     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2099               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2100     OUT_BATCH(batch, 0);
2101     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2102     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2103               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2104               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2105     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2106               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2107     OUT_BATCH(batch, 0);
2108     OUT_BATCH(batch, 0);
2109 }
2110
2111 static void
2112 gen6_emit_vertex_element_state(VADriverContextP ctx)
2113 {
2114     struct i965_driver_data *i965 = i965_driver_data(ctx);
2115     struct intel_batchbuffer *batch = i965->batch;
2116
2117     /* Set up our vertex elements, sourced from the single vertex buffer. */
2118     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2119     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2120     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2121               GEN6_VE0_VALID |
2122               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2123               (0 << VE0_OFFSET_SHIFT));
2124     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2125               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2126               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2127               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2128     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2129     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2130               GEN6_VE0_VALID |
2131               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2132               (8 << VE0_OFFSET_SHIFT));
2133     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2134               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2135               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2136               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2137 }
2138
2139 static void
2140 gen6_emit_vertices(VADriverContextP ctx)
2141 {
2142     struct i965_driver_data *i965 = i965_driver_data(ctx);
2143     struct intel_batchbuffer *batch = i965->batch;
2144     struct i965_render_state *render_state = &i965->render_state;
2145
2146     BEGIN_BATCH(batch, 11);
2147     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2148     OUT_BATCH(batch, 
2149               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2150               GEN6_VB0_VERTEXDATA |
2151               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2152     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2153     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2154     OUT_BATCH(batch, 0);
2155
2156     OUT_BATCH(batch, 
2157               CMD_3DPRIMITIVE |
2158               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2159               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2160               (0 << 9) |
2161               4);
2162     OUT_BATCH(batch, 3); /* vertex count per instance */
2163     OUT_BATCH(batch, 0); /* start vertex offset */
2164     OUT_BATCH(batch, 1); /* single instance */
2165     OUT_BATCH(batch, 0); /* start instance location */
2166     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2167     ADVANCE_BATCH(batch);
2168 }
2169
2170 static void
2171 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2172 {
2173     struct i965_driver_data *i965 = i965_driver_data(ctx);
2174     struct intel_batchbuffer *batch = i965->batch;
2175
2176     intel_batchbuffer_start_atomic(batch, 0x1000);
2177     intel_batchbuffer_emit_mi_flush(batch);
2178     gen6_emit_invarient_states(ctx);
2179     gen6_emit_state_base_address(ctx);
2180     gen6_emit_viewport_state_pointers(ctx);
2181     gen6_emit_urb(ctx);
2182     gen6_emit_cc_state_pointers(ctx);
2183     gen6_emit_sampler_state_pointers(ctx);
2184     gen6_emit_vs_state(ctx);
2185     gen6_emit_gs_state(ctx);
2186     gen6_emit_clip_state(ctx);
2187     gen6_emit_sf_state(ctx);
2188     gen6_emit_wm_state(ctx, kernel);
2189     gen6_emit_binding_table(ctx);
2190     gen6_emit_depth_buffer_state(ctx);
2191     gen6_emit_drawing_rectangle(ctx);
2192     gen6_emit_vertex_element_state(ctx);
2193     gen6_emit_vertices(ctx);
2194     intel_batchbuffer_end_atomic(batch);
2195 }
2196
2197 static void
2198 gen6_render_put_surface(
2199     VADriverContextP   ctx,
2200     struct object_surface *obj_surface,
2201     const VARectangle *src_rect,
2202     const VARectangle *dst_rect,
2203     unsigned int       flags
2204 )
2205 {
2206     struct i965_driver_data *i965 = i965_driver_data(ctx);
2207     struct intel_batchbuffer *batch = i965->batch;
2208
2209     gen6_render_initialize(ctx);
2210     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2211     i965_clear_dest_region(ctx);
2212     gen6_render_emit_states(ctx, PS_KERNEL);
2213     intel_batchbuffer_flush(batch);
2214 }
2215
2216 static void
2217 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2218 {
2219     struct i965_driver_data *i965 = i965_driver_data(ctx);
2220     struct i965_render_state *render_state = &i965->render_state;
2221     struct gen6_blend_state *blend_state;
2222
2223     dri_bo_unmap(render_state->cc.state);    
2224     dri_bo_map(render_state->cc.blend, 1);
2225     assert(render_state->cc.blend->virtual);
2226     blend_state = render_state->cc.blend->virtual;
2227     memset(blend_state, 0, sizeof(*blend_state));
2228     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2229     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2230     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2231     blend_state->blend0.blend_enable = 1;
2232     blend_state->blend1.post_blend_clamp_enable = 1;
2233     blend_state->blend1.pre_blend_clamp_enable = 1;
2234     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2235     dri_bo_unmap(render_state->cc.blend);
2236 }
2237
2238 static void
2239 gen6_subpicture_render_setup_states(
2240     VADriverContextP   ctx,
2241     struct object_surface *obj_surface,
2242     const VARectangle *src_rect,
2243     const VARectangle *dst_rect
2244 )
2245 {
2246     i965_render_dest_surface_state(ctx, 0);
2247     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2248     i965_render_sampler(ctx);
2249     i965_render_cc_viewport(ctx);
2250     gen6_render_color_calc_state(ctx);
2251     gen6_subpicture_render_blend_state(ctx);
2252     gen6_render_depth_stencil_state(ctx);
2253     i965_subpic_render_upload_constants(ctx, obj_surface);
2254     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2255 }
2256
2257 static void
2258 gen6_render_put_subpicture(
2259     VADriverContextP   ctx,
2260     struct object_surface *obj_surface,
2261     const VARectangle *src_rect,
2262     const VARectangle *dst_rect
2263 )
2264 {
2265     struct i965_driver_data *i965 = i965_driver_data(ctx);
2266     struct intel_batchbuffer *batch = i965->batch;
2267     unsigned int index = obj_surface->subpic_render_idx;
2268     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2269
2270     assert(obj_subpic);
2271     gen6_render_initialize(ctx);
2272     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2273     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2274     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2275     intel_batchbuffer_flush(batch);
2276 }
2277
2278 /*
2279  * for GEN7
2280  */
2281 static void 
2282 gen7_render_initialize(VADriverContextP ctx)
2283 {
2284     struct i965_driver_data *i965 = i965_driver_data(ctx);
2285     struct i965_render_state *render_state = &i965->render_state;
2286     dri_bo *bo;
2287
2288     /* VERTEX BUFFER */
2289     dri_bo_unreference(render_state->vb.vertex_buffer);
2290     bo = dri_bo_alloc(i965->intel.bufmgr,
2291                       "vertex buffer",
2292                       4096,
2293                       4096);
2294     assert(bo);
2295     render_state->vb.vertex_buffer = bo;
2296
2297     /* WM */
2298     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2299     bo = dri_bo_alloc(i965->intel.bufmgr,
2300                       "surface state & binding table",
2301                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2302                       4096);
2303     assert(bo);
2304     render_state->wm.surface_state_binding_table_bo = bo;
2305
2306     dri_bo_unreference(render_state->wm.sampler);
2307     bo = dri_bo_alloc(i965->intel.bufmgr,
2308                       "sampler state",
2309                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2310                       4096);
2311     assert(bo);
2312     render_state->wm.sampler = bo;
2313     render_state->wm.sampler_count = 0;
2314
2315     /* COLOR CALCULATOR */
2316     dri_bo_unreference(render_state->cc.state);
2317     bo = dri_bo_alloc(i965->intel.bufmgr,
2318                       "color calc state",
2319                       sizeof(struct gen6_color_calc_state),
2320                       4096);
2321     assert(bo);
2322     render_state->cc.state = bo;
2323
2324     /* CC VIEWPORT */
2325     dri_bo_unreference(render_state->cc.viewport);
2326     bo = dri_bo_alloc(i965->intel.bufmgr,
2327                       "cc viewport",
2328                       sizeof(struct i965_cc_viewport),
2329                       4096);
2330     assert(bo);
2331     render_state->cc.viewport = bo;
2332
2333     /* BLEND STATE */
2334     dri_bo_unreference(render_state->cc.blend);
2335     bo = dri_bo_alloc(i965->intel.bufmgr,
2336                       "blend state",
2337                       sizeof(struct gen6_blend_state),
2338                       4096);
2339     assert(bo);
2340     render_state->cc.blend = bo;
2341
2342     /* DEPTH & STENCIL STATE */
2343     dri_bo_unreference(render_state->cc.depth_stencil);
2344     bo = dri_bo_alloc(i965->intel.bufmgr,
2345                       "depth & stencil state",
2346                       sizeof(struct gen6_depth_stencil_state),
2347                       4096);
2348     assert(bo);
2349     render_state->cc.depth_stencil = bo;
2350 }
2351
2352 /*
2353  * for GEN8
2354  */
2355 #define ALIGNMENT       64
2356
2357 static void
2358 gen7_render_color_calc_state(VADriverContextP ctx)
2359 {
2360     struct i965_driver_data *i965 = i965_driver_data(ctx);
2361     struct i965_render_state *render_state = &i965->render_state;
2362     struct gen6_color_calc_state *color_calc_state;
2363     
2364     dri_bo_map(render_state->cc.state, 1);
2365     assert(render_state->cc.state->virtual);
2366     color_calc_state = render_state->cc.state->virtual;
2367     memset(color_calc_state, 0, sizeof(*color_calc_state));
2368     color_calc_state->constant_r = 1.0;
2369     color_calc_state->constant_g = 0.0;
2370     color_calc_state->constant_b = 1.0;
2371     color_calc_state->constant_a = 1.0;
2372     dri_bo_unmap(render_state->cc.state);
2373 }
2374
2375 static void
2376 gen7_render_blend_state(VADriverContextP ctx)
2377 {
2378     struct i965_driver_data *i965 = i965_driver_data(ctx);
2379     struct i965_render_state *render_state = &i965->render_state;
2380     struct gen6_blend_state *blend_state;
2381     
2382     dri_bo_map(render_state->cc.blend, 1);
2383     assert(render_state->cc.blend->virtual);
2384     blend_state = render_state->cc.blend->virtual;
2385     memset(blend_state, 0, sizeof(*blend_state));
2386     blend_state->blend1.logic_op_enable = 1;
2387     blend_state->blend1.logic_op_func = 0xc;
2388     blend_state->blend1.pre_blend_clamp_enable = 1;
2389     dri_bo_unmap(render_state->cc.blend);
2390 }
2391
2392 static void
2393 gen7_render_depth_stencil_state(VADriverContextP ctx)
2394 {
2395     struct i965_driver_data *i965 = i965_driver_data(ctx);
2396     struct i965_render_state *render_state = &i965->render_state;
2397     struct gen6_depth_stencil_state *depth_stencil_state;
2398     
2399     dri_bo_map(render_state->cc.depth_stencil, 1);
2400     assert(render_state->cc.depth_stencil->virtual);
2401     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2402     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2403     dri_bo_unmap(render_state->cc.depth_stencil);
2404 }
2405
2406 static void 
2407 gen7_render_sampler(VADriverContextP ctx)
2408 {
2409     struct i965_driver_data *i965 = i965_driver_data(ctx);
2410     struct i965_render_state *render_state = &i965->render_state;
2411     struct gen7_sampler_state *sampler_state;
2412     int i;
2413     
2414     assert(render_state->wm.sampler_count > 0);
2415     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2416
2417     dri_bo_map(render_state->wm.sampler, 1);
2418     assert(render_state->wm.sampler->virtual);
2419     sampler_state = render_state->wm.sampler->virtual;
2420     for (i = 0; i < render_state->wm.sampler_count; i++) {
2421         memset(sampler_state, 0, sizeof(*sampler_state));
2422         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2423         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2424         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2425         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2426         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2427         sampler_state++;
2428     }
2429
2430     dri_bo_unmap(render_state->wm.sampler);
2431 }
2432
2433
2434 static void
2435 gen7_render_setup_states(
2436     VADriverContextP   ctx,
2437     struct object_surface *obj_surface,
2438     const VARectangle *src_rect,
2439     const VARectangle *dst_rect,
2440     unsigned int       flags
2441 )
2442 {
2443     i965_render_dest_surface_state(ctx, 0);
2444     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2445     gen7_render_sampler(ctx);
2446     i965_render_cc_viewport(ctx);
2447     gen7_render_color_calc_state(ctx);
2448     gen7_render_blend_state(ctx);
2449     gen7_render_depth_stencil_state(ctx);
2450     i965_render_upload_constants(ctx, obj_surface, flags);
2451     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2452 }
2453
2454
2455 static void
2456 gen7_emit_invarient_states(VADriverContextP ctx)
2457 {
2458     struct i965_driver_data *i965 = i965_driver_data(ctx);
2459     struct intel_batchbuffer *batch = i965->batch;
2460
2461     BEGIN_BATCH(batch, 1);
2462     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2463     ADVANCE_BATCH(batch);
2464
2465     BEGIN_BATCH(batch, 4);
2466     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2467     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2468               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2469     OUT_BATCH(batch, 0);
2470     OUT_BATCH(batch, 0);
2471     ADVANCE_BATCH(batch);
2472
2473     BEGIN_BATCH(batch, 2);
2474     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2475     OUT_BATCH(batch, 1);
2476     ADVANCE_BATCH(batch);
2477
2478     /* Set system instruction pointer */
2479     BEGIN_BATCH(batch, 2);
2480     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2481     OUT_BATCH(batch, 0);
2482     ADVANCE_BATCH(batch);
2483 }
2484
2485 static void
2486 gen7_emit_state_base_address(VADriverContextP ctx)
2487 {
2488     struct i965_driver_data *i965 = i965_driver_data(ctx);
2489     struct intel_batchbuffer *batch = i965->batch;
2490     struct i965_render_state *render_state = &i965->render_state;
2491
2492     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2493     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2494     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2495     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2496     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2497     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2498     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2499     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2500     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2501     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2502 }
2503
2504 static void
2505 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2506 {
2507     struct i965_driver_data *i965 = i965_driver_data(ctx);
2508     struct intel_batchbuffer *batch = i965->batch;
2509     struct i965_render_state *render_state = &i965->render_state;
2510
2511     BEGIN_BATCH(batch, 2);
2512     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2513     OUT_RELOC(batch,
2514               render_state->cc.viewport,
2515               I915_GEM_DOMAIN_INSTRUCTION, 0,
2516               0);
2517     ADVANCE_BATCH(batch);
2518
2519     BEGIN_BATCH(batch, 2);
2520     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2521     OUT_BATCH(batch, 0);
2522     ADVANCE_BATCH(batch);
2523 }
2524
2525 /*
2526  * URB layout on GEN7 
2527  * ----------------------------------------
2528  * | PS Push Constants (8KB) | VS entries |
2529  * ----------------------------------------
2530  */
2531 static void
2532 gen7_emit_urb(VADriverContextP ctx)
2533 {
2534     struct i965_driver_data *i965 = i965_driver_data(ctx);
2535     struct intel_batchbuffer *batch = i965->batch;
2536     unsigned int num_urb_entries = 32;
2537
2538     if (IS_HASWELL(i965->intel.device_info))
2539         num_urb_entries = 64;
2540
2541     BEGIN_BATCH(batch, 2);
2542     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2543     OUT_BATCH(batch, 8); /* in 1KBs */
2544     ADVANCE_BATCH(batch);
2545
2546     BEGIN_BATCH(batch, 2);
2547     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2548     OUT_BATCH(batch, 
2549               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2550               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2551               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2552    ADVANCE_BATCH(batch);
2553
2554    BEGIN_BATCH(batch, 2);
2555    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2556    OUT_BATCH(batch,
2557              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2558              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2559    ADVANCE_BATCH(batch);
2560
2561    BEGIN_BATCH(batch, 2);
2562    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2563    OUT_BATCH(batch,
2564              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2565              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2566    ADVANCE_BATCH(batch);
2567
2568    BEGIN_BATCH(batch, 2);
2569    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2570    OUT_BATCH(batch,
2571              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2572              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2573    ADVANCE_BATCH(batch);
2574 }
2575
2576 static void
2577 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2578 {
2579     struct i965_driver_data *i965 = i965_driver_data(ctx);
2580     struct intel_batchbuffer *batch = i965->batch;
2581     struct i965_render_state *render_state = &i965->render_state;
2582
2583     BEGIN_BATCH(batch, 2);
2584     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2585     OUT_RELOC(batch,
2586               render_state->cc.state,
2587               I915_GEM_DOMAIN_INSTRUCTION, 0,
2588               1);
2589     ADVANCE_BATCH(batch);
2590
2591     BEGIN_BATCH(batch, 2);
2592     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2593     OUT_RELOC(batch,
2594               render_state->cc.blend,
2595               I915_GEM_DOMAIN_INSTRUCTION, 0,
2596               1);
2597     ADVANCE_BATCH(batch);
2598
2599     BEGIN_BATCH(batch, 2);
2600     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2601     OUT_RELOC(batch,
2602               render_state->cc.depth_stencil,
2603               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2604               1);
2605     ADVANCE_BATCH(batch);
2606 }
2607
2608 static void
2609 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2610 {
2611     struct i965_driver_data *i965 = i965_driver_data(ctx);
2612     struct intel_batchbuffer *batch = i965->batch;
2613     struct i965_render_state *render_state = &i965->render_state;
2614
2615     BEGIN_BATCH(batch, 2);
2616     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2617     OUT_RELOC(batch,
2618               render_state->wm.sampler,
2619               I915_GEM_DOMAIN_INSTRUCTION, 0,
2620               0);
2621     ADVANCE_BATCH(batch);
2622 }
2623
2624 static void
2625 gen7_emit_binding_table(VADriverContextP ctx)
2626 {
2627     struct i965_driver_data *i965 = i965_driver_data(ctx);
2628     struct intel_batchbuffer *batch = i965->batch;
2629
2630     BEGIN_BATCH(batch, 2);
2631     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2632     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2633     ADVANCE_BATCH(batch);
2634 }
2635
2636 static void
2637 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2638 {
2639     struct i965_driver_data *i965 = i965_driver_data(ctx);
2640     struct intel_batchbuffer *batch = i965->batch;
2641
2642     BEGIN_BATCH(batch, 7);
2643     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2644     OUT_BATCH(batch,
2645               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2646               (I965_SURFACE_NULL << 29));
2647     OUT_BATCH(batch, 0);
2648     OUT_BATCH(batch, 0);
2649     OUT_BATCH(batch, 0);
2650     OUT_BATCH(batch, 0);
2651     OUT_BATCH(batch, 0);
2652     ADVANCE_BATCH(batch);
2653
2654     BEGIN_BATCH(batch, 3);
2655     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2656     OUT_BATCH(batch, 0);
2657     OUT_BATCH(batch, 0);
2658     ADVANCE_BATCH(batch);
2659 }
2660
2661 static void
2662 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2663 {
2664     i965_render_drawing_rectangle(ctx);
2665 }
2666
2667 static void 
2668 gen7_emit_vs_state(VADriverContextP ctx)
2669 {
2670     struct i965_driver_data *i965 = i965_driver_data(ctx);
2671     struct intel_batchbuffer *batch = i965->batch;
2672
2673     /* disable VS constant buffer */
2674     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2675     OUT_BATCH(batch, 0);
2676     OUT_BATCH(batch, 0);
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0);
2680     OUT_BATCH(batch, 0);
2681         
2682     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2683     OUT_BATCH(batch, 0); /* without VS kernel */
2684     OUT_BATCH(batch, 0);
2685     OUT_BATCH(batch, 0);
2686     OUT_BATCH(batch, 0);
2687     OUT_BATCH(batch, 0); /* pass-through */
2688 }
2689
2690 static void 
2691 gen7_emit_bypass_state(VADriverContextP ctx)
2692 {
2693     struct i965_driver_data *i965 = i965_driver_data(ctx);
2694     struct intel_batchbuffer *batch = i965->batch;
2695
2696     /* bypass GS */
2697     BEGIN_BATCH(batch, 7);
2698     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2699     OUT_BATCH(batch, 0);
2700     OUT_BATCH(batch, 0);
2701     OUT_BATCH(batch, 0);
2702     OUT_BATCH(batch, 0);
2703     OUT_BATCH(batch, 0);
2704     OUT_BATCH(batch, 0);
2705     ADVANCE_BATCH(batch);
2706
2707     BEGIN_BATCH(batch, 7);      
2708     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2709     OUT_BATCH(batch, 0); /* without GS kernel */
2710     OUT_BATCH(batch, 0);
2711     OUT_BATCH(batch, 0);
2712     OUT_BATCH(batch, 0);
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0); /* pass-through */
2715     ADVANCE_BATCH(batch);
2716
2717     BEGIN_BATCH(batch, 2);
2718     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2719     OUT_BATCH(batch, 0);
2720     ADVANCE_BATCH(batch);
2721
2722     /* disable HS */
2723     BEGIN_BATCH(batch, 7);
2724     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2725     OUT_BATCH(batch, 0);
2726     OUT_BATCH(batch, 0);
2727     OUT_BATCH(batch, 0);
2728     OUT_BATCH(batch, 0);
2729     OUT_BATCH(batch, 0);
2730     OUT_BATCH(batch, 0);
2731     ADVANCE_BATCH(batch);
2732
2733     BEGIN_BATCH(batch, 7);
2734     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2735     OUT_BATCH(batch, 0);
2736     OUT_BATCH(batch, 0);
2737     OUT_BATCH(batch, 0);
2738     OUT_BATCH(batch, 0);
2739     OUT_BATCH(batch, 0);
2740     OUT_BATCH(batch, 0);
2741     ADVANCE_BATCH(batch);
2742
2743     BEGIN_BATCH(batch, 2);
2744     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2745     OUT_BATCH(batch, 0);
2746     ADVANCE_BATCH(batch);
2747
2748     /* Disable TE */
2749     BEGIN_BATCH(batch, 4);
2750     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2751     OUT_BATCH(batch, 0);
2752     OUT_BATCH(batch, 0);
2753     OUT_BATCH(batch, 0);
2754     ADVANCE_BATCH(batch);
2755
2756     /* Disable DS */
2757     BEGIN_BATCH(batch, 7);
2758     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2759     OUT_BATCH(batch, 0);
2760     OUT_BATCH(batch, 0);
2761     OUT_BATCH(batch, 0);
2762     OUT_BATCH(batch, 0);
2763     OUT_BATCH(batch, 0);
2764     OUT_BATCH(batch, 0);
2765     ADVANCE_BATCH(batch);
2766
2767     BEGIN_BATCH(batch, 6);
2768     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2769     OUT_BATCH(batch, 0);
2770     OUT_BATCH(batch, 0);
2771     OUT_BATCH(batch, 0);
2772     OUT_BATCH(batch, 0);
2773     OUT_BATCH(batch, 0);
2774     ADVANCE_BATCH(batch);
2775
2776     BEGIN_BATCH(batch, 2);
2777     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2778     OUT_BATCH(batch, 0);
2779     ADVANCE_BATCH(batch);
2780
2781     /* Disable STREAMOUT */
2782     BEGIN_BATCH(batch, 3);
2783     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2784     OUT_BATCH(batch, 0);
2785     OUT_BATCH(batch, 0);
2786     ADVANCE_BATCH(batch);
2787 }
2788
2789 static void 
2790 gen7_emit_clip_state(VADriverContextP ctx)
2791 {
2792     struct i965_driver_data *i965 = i965_driver_data(ctx);
2793     struct intel_batchbuffer *batch = i965->batch;
2794
2795     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2796     OUT_BATCH(batch, 0);
2797     OUT_BATCH(batch, 0); /* pass-through */
2798     OUT_BATCH(batch, 0);
2799 }
2800
2801 static void 
2802 gen7_emit_sf_state(VADriverContextP ctx)
2803 {
2804     struct i965_driver_data *i965 = i965_driver_data(ctx);
2805     struct intel_batchbuffer *batch = i965->batch;
2806
2807     BEGIN_BATCH(batch, 14);
2808     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2809     OUT_BATCH(batch,
2810               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2811               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2812               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2813     OUT_BATCH(batch, 0);
2814     OUT_BATCH(batch, 0);
2815     OUT_BATCH(batch, 0); /* DW4 */
2816     OUT_BATCH(batch, 0);
2817     OUT_BATCH(batch, 0);
2818     OUT_BATCH(batch, 0);
2819     OUT_BATCH(batch, 0);
2820     OUT_BATCH(batch, 0); /* DW9 */
2821     OUT_BATCH(batch, 0);
2822     OUT_BATCH(batch, 0);
2823     OUT_BATCH(batch, 0);
2824     OUT_BATCH(batch, 0);
2825     ADVANCE_BATCH(batch);
2826
2827     BEGIN_BATCH(batch, 7);
2828     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2829     OUT_BATCH(batch, 0);
2830     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2831     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2832     OUT_BATCH(batch, 0);
2833     OUT_BATCH(batch, 0);
2834     OUT_BATCH(batch, 0);
2835     ADVANCE_BATCH(batch);
2836 }
2837
2838 static void 
2839 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2840 {
2841     struct i965_driver_data *i965 = i965_driver_data(ctx);
2842     struct intel_batchbuffer *batch = i965->batch;
2843     struct i965_render_state *render_state = &i965->render_state;
2844     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2845     unsigned int num_samples = 0;
2846
2847     if (IS_HASWELL(i965->intel.device_info)) {
2848         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2849         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2850     }
2851
2852     BEGIN_BATCH(batch, 3);
2853     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2854     OUT_BATCH(batch,
2855               GEN7_WM_DISPATCH_ENABLE |
2856               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2857     OUT_BATCH(batch, 0);
2858     ADVANCE_BATCH(batch);
2859
2860     BEGIN_BATCH(batch, 7);
2861     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2862     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2863     OUT_BATCH(batch, 0);
2864     OUT_RELOC(batch, 
2865               render_state->curbe.bo,
2866               I915_GEM_DOMAIN_INSTRUCTION, 0,
2867               0);
2868     OUT_BATCH(batch, 0);
2869     OUT_BATCH(batch, 0);
2870     OUT_BATCH(batch, 0);
2871     ADVANCE_BATCH(batch);
2872
2873     BEGIN_BATCH(batch, 8);
2874     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2875     OUT_RELOC(batch, 
2876               render_state->render_kernels[kernel].bo,
2877               I915_GEM_DOMAIN_INSTRUCTION, 0,
2878               0);
2879     OUT_BATCH(batch, 
2880               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2881               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2882     OUT_BATCH(batch, 0); /* scratch space base offset */
2883     OUT_BATCH(batch, 
2884               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2885               GEN7_PS_PUSH_CONSTANT_ENABLE |
2886               GEN7_PS_ATTRIBUTE_ENABLE |
2887               GEN7_PS_16_DISPATCH_ENABLE);
2888     OUT_BATCH(batch, 
2889               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2890     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2891     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2892     ADVANCE_BATCH(batch);
2893 }
2894
2895 static void
2896 gen7_emit_vertex_element_state(VADriverContextP ctx)
2897 {
2898     struct i965_driver_data *i965 = i965_driver_data(ctx);
2899     struct intel_batchbuffer *batch = i965->batch;
2900
2901     /* Set up our vertex elements, sourced from the single vertex buffer. */
2902     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2903     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2904     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2905               GEN6_VE0_VALID |
2906               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2907               (0 << VE0_OFFSET_SHIFT));
2908     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2909               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2910               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2911               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2912     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2913     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2914               GEN6_VE0_VALID |
2915               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2916               (8 << VE0_OFFSET_SHIFT));
2917     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2918               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2919               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2920               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2921 }
2922
2923 static void
2924 gen7_emit_vertices(VADriverContextP ctx)
2925 {
2926     struct i965_driver_data *i965 = i965_driver_data(ctx);
2927     struct intel_batchbuffer *batch = i965->batch;
2928     struct i965_render_state *render_state = &i965->render_state;
2929
2930     BEGIN_BATCH(batch, 5);
2931     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2932     OUT_BATCH(batch, 
2933               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2934               GEN6_VB0_VERTEXDATA |
2935               GEN7_VB0_ADDRESS_MODIFYENABLE |
2936               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2937     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2938     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2939     OUT_BATCH(batch, 0);
2940     ADVANCE_BATCH(batch);
2941
2942     BEGIN_BATCH(batch, 7);
2943     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2944     OUT_BATCH(batch,
2945               _3DPRIM_RECTLIST |
2946               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2947     OUT_BATCH(batch, 3); /* vertex count per instance */
2948     OUT_BATCH(batch, 0); /* start vertex offset */
2949     OUT_BATCH(batch, 1); /* single instance */
2950     OUT_BATCH(batch, 0); /* start instance location */
2951     OUT_BATCH(batch, 0);
2952     ADVANCE_BATCH(batch);
2953 }
2954
2955 static void
2956 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2957 {
2958     struct i965_driver_data *i965 = i965_driver_data(ctx);
2959     struct intel_batchbuffer *batch = i965->batch;
2960
2961     intel_batchbuffer_start_atomic(batch, 0x1000);
2962     intel_batchbuffer_emit_mi_flush(batch);
2963     gen7_emit_invarient_states(ctx);
2964     gen7_emit_state_base_address(ctx);
2965     gen7_emit_viewport_state_pointers(ctx);
2966     gen7_emit_urb(ctx);
2967     gen7_emit_cc_state_pointers(ctx);
2968     gen7_emit_sampler_state_pointers(ctx);
2969     gen7_emit_bypass_state(ctx);
2970     gen7_emit_vs_state(ctx);
2971     gen7_emit_clip_state(ctx);
2972     gen7_emit_sf_state(ctx);
2973     gen7_emit_wm_state(ctx, kernel);
2974     gen7_emit_binding_table(ctx);
2975     gen7_emit_depth_buffer_state(ctx);
2976     gen7_emit_drawing_rectangle(ctx);
2977     gen7_emit_vertex_element_state(ctx);
2978     gen7_emit_vertices(ctx);
2979     intel_batchbuffer_end_atomic(batch);
2980 }
2981
2982
2983 static void
2984 gen7_render_put_surface(
2985     VADriverContextP   ctx,
2986     struct object_surface *obj_surface,    
2987     const VARectangle *src_rect,
2988     const VARectangle *dst_rect,
2989     unsigned int       flags
2990 )
2991 {
2992     struct i965_driver_data *i965 = i965_driver_data(ctx);
2993     struct intel_batchbuffer *batch = i965->batch;
2994
2995     gen7_render_initialize(ctx);
2996     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2997     i965_clear_dest_region(ctx);
2998     gen7_render_emit_states(ctx, PS_KERNEL);
2999     intel_batchbuffer_flush(batch);
3000 }
3001
3002
3003 static void
3004 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3005 {
3006     struct i965_driver_data *i965 = i965_driver_data(ctx);
3007     struct i965_render_state *render_state = &i965->render_state;
3008     struct gen6_blend_state *blend_state;
3009
3010     dri_bo_unmap(render_state->cc.state);    
3011     dri_bo_map(render_state->cc.blend, 1);
3012     assert(render_state->cc.blend->virtual);
3013     blend_state = render_state->cc.blend->virtual;
3014     memset(blend_state, 0, sizeof(*blend_state));
3015     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3016     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3017     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3018     blend_state->blend0.blend_enable = 1;
3019     blend_state->blend1.post_blend_clamp_enable = 1;
3020     blend_state->blend1.pre_blend_clamp_enable = 1;
3021     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3022     dri_bo_unmap(render_state->cc.blend);
3023 }
3024
3025 static void
3026 gen7_subpicture_render_setup_states(
3027     VADriverContextP   ctx,
3028     struct object_surface *obj_surface,
3029     const VARectangle *src_rect,
3030     const VARectangle *dst_rect
3031 )
3032 {
3033     i965_render_dest_surface_state(ctx, 0);
3034     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3035     i965_render_sampler(ctx);
3036     i965_render_cc_viewport(ctx);
3037     gen7_render_color_calc_state(ctx);
3038     gen7_subpicture_render_blend_state(ctx);
3039     gen7_render_depth_stencil_state(ctx);
3040     i965_subpic_render_upload_constants(ctx, obj_surface);
3041     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3042 }
3043
3044 static void
3045 gen7_render_put_subpicture(
3046     VADriverContextP   ctx,
3047     struct object_surface *obj_surface,
3048     const VARectangle *src_rect,
3049     const VARectangle *dst_rect
3050 )
3051 {
3052     struct i965_driver_data *i965 = i965_driver_data(ctx);
3053     struct intel_batchbuffer *batch = i965->batch;
3054     unsigned int index = obj_surface->subpic_render_idx;
3055     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3056
3057     assert(obj_subpic);
3058     gen7_render_initialize(ctx);
3059     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3060     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3061     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3062     intel_batchbuffer_flush(batch);
3063 }
3064
3065
3066 void
3067 intel_render_put_surface(
3068     VADriverContextP   ctx,
3069     struct object_surface *obj_surface,
3070     const VARectangle *src_rect,
3071     const VARectangle *dst_rect,
3072     unsigned int       flags
3073 )
3074 {
3075     struct i965_driver_data *i965 = i965_driver_data(ctx);
3076     struct i965_render_state *render_state = &i965->render_state;
3077     int has_done_scaling = 0;
3078     VARectangle calibrated_rect;
3079     VASurfaceID out_surface_id = i965_post_processing(ctx,
3080                                                       obj_surface,
3081                                                       src_rect,
3082                                                       dst_rect,
3083                                                       flags,
3084                                                       &has_done_scaling,
3085                                                       &calibrated_rect);
3086
3087     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3088
3089     if (out_surface_id != VA_INVALID_ID) {
3090         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3091         
3092         if (new_obj_surface && new_obj_surface->bo)
3093             obj_surface = new_obj_surface;
3094
3095         if (has_done_scaling)
3096             src_rect = &calibrated_rect;
3097     }
3098
3099     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3100
3101     if (out_surface_id != VA_INVALID_ID)
3102         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3103 }
3104
3105 void
3106 intel_render_put_subpicture(
3107     VADriverContextP   ctx,
3108     struct object_surface *obj_surface,
3109     const VARectangle *src_rect,
3110     const VARectangle *dst_rect
3111 )
3112 {
3113     struct i965_driver_data *i965 = i965_driver_data(ctx);
3114     struct i965_render_state *render_state = &i965->render_state;
3115
3116     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3117 }
3118
3119 static void
3120 genx_render_terminate(VADriverContextP ctx)
3121 {
3122     int i;
3123     struct i965_driver_data *i965 = i965_driver_data(ctx);
3124     struct i965_render_state *render_state = &i965->render_state;
3125
3126     dri_bo_unreference(render_state->curbe.bo);
3127     render_state->curbe.bo = NULL;
3128
3129     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3130         struct i965_kernel *kernel = &render_state->render_kernels[i];
3131
3132         dri_bo_unreference(kernel->bo);
3133         kernel->bo = NULL;
3134     }
3135
3136     dri_bo_unreference(render_state->vb.vertex_buffer);
3137     render_state->vb.vertex_buffer = NULL;
3138     dri_bo_unreference(render_state->vs.state);
3139     render_state->vs.state = NULL;
3140     dri_bo_unreference(render_state->sf.state);
3141     render_state->sf.state = NULL;
3142     dri_bo_unreference(render_state->wm.sampler);
3143     render_state->wm.sampler = NULL;
3144     dri_bo_unreference(render_state->wm.state);
3145     render_state->wm.state = NULL;
3146     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3147     dri_bo_unreference(render_state->cc.viewport);
3148     render_state->cc.viewport = NULL;
3149     dri_bo_unreference(render_state->cc.state);
3150     render_state->cc.state = NULL;
3151     dri_bo_unreference(render_state->cc.blend);
3152     render_state->cc.blend = NULL;
3153     dri_bo_unreference(render_state->cc.depth_stencil);
3154     render_state->cc.depth_stencil = NULL;
3155
3156     if (render_state->draw_region) {
3157         dri_bo_unreference(render_state->draw_region->bo);
3158         free(render_state->draw_region);
3159         render_state->draw_region = NULL;
3160     }
3161 }
3162
3163 bool 
3164 genx_render_init(VADriverContextP ctx)
3165 {
3166     struct i965_driver_data *i965 = i965_driver_data(ctx);
3167     struct i965_render_state *render_state = &i965->render_state;
3168     int i;
3169
3170     /* kernel */
3171     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3172                                  sizeof(render_kernels_gen5[0])));
3173     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3174                                  sizeof(render_kernels_gen6[0])));
3175
3176     if (IS_GEN7(i965->intel.device_info)) {
3177         memcpy(render_state->render_kernels,
3178                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3179                sizeof(render_state->render_kernels));
3180         render_state->render_put_surface = gen7_render_put_surface;
3181         render_state->render_put_subpicture = gen7_render_put_subpicture;
3182     } else if (IS_GEN6(i965->intel.device_info)) {
3183         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3184         render_state->render_put_surface = gen6_render_put_surface;
3185         render_state->render_put_subpicture = gen6_render_put_subpicture;
3186     } else if (IS_IRONLAKE(i965->intel.device_info)) {
3187         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3188         render_state->render_put_surface = i965_render_put_surface;
3189         render_state->render_put_subpicture = i965_render_put_subpicture;
3190     } else {
3191         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3192         render_state->render_put_surface = i965_render_put_surface;
3193         render_state->render_put_subpicture = i965_render_put_subpicture;
3194     }
3195
3196     render_state->render_terminate = genx_render_terminate;
3197
3198     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3199         struct i965_kernel *kernel = &render_state->render_kernels[i];
3200
3201         if (!kernel->size)
3202             continue;
3203
3204         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3205                                   kernel->name, 
3206                                   kernel->size, 0x1000);
3207         assert(kernel->bo);
3208         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3209     }
3210
3211     /* constant buffer */
3212     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3213                       "constant buffer",
3214                       4096, 64);
3215     assert(render_state->curbe.bo);
3216
3217     return true;
3218 }
3219
3220 bool
3221 i965_render_init(VADriverContextP ctx)
3222 {
3223     struct i965_driver_data *i965 = i965_driver_data(ctx);
3224
3225     return i965->codec_info->render_init(ctx);
3226 }
3227
3228 void
3229 i965_render_terminate(VADriverContextP ctx)
3230 {
3231     struct i965_driver_data *i965 = i965_driver_data(ctx);
3232     struct i965_render_state *render_state = &i965->render_state;
3233
3234     render_state->render_terminate(ctx);
3235 }