OSDN Git Service

Set the pipeline to use the new VP8 encoding shaders on BSW
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47 #include "i965_yuv_coefs.h"
48
49 #include "i965_render.h"
50 #include "i965_post_processing.h"
51
52 #define SF_KERNEL_NUM_GRF       16
53 #define SF_MAX_THREADS          1
54
55 static const uint32_t sf_kernel_static[][4] = 
56 {
57 #include "shaders/render/exa_sf.g4b"
58 };
59
60 #define PS_KERNEL_NUM_GRF       48
61 #define PS_MAX_THREADS          32
62
63 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
64
65 static const uint32_t ps_kernel_static[][4] = 
66 {
67 #include "shaders/render/exa_wm_xy.g4b"
68 #include "shaders/render/exa_wm_src_affine.g4b"
69 #include "shaders/render/exa_wm_src_sample_planar.g4b"
70 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
71 #include "shaders/render/exa_wm_yuv_rgb.g4b"
72 #include "shaders/render/exa_wm_write.g4b"
73 };
74 static const uint32_t ps_subpic_kernel_static[][4] = 
75 {
76 #include "shaders/render/exa_wm_xy.g4b"
77 #include "shaders/render/exa_wm_src_affine.g4b"
78 #include "shaders/render/exa_wm_src_sample_argb.g4b"
79 #include "shaders/render/exa_wm_write.g4b"
80 };
81
82 /* On IRONLAKE */
83 static const uint32_t sf_kernel_static_gen5[][4] = 
84 {
85 #include "shaders/render/exa_sf.g4b.gen5"
86 };
87
88 static const uint32_t ps_kernel_static_gen5[][4] = 
89 {
90 #include "shaders/render/exa_wm_xy.g4b.gen5"
91 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
92 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
93 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
94 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
95 #include "shaders/render/exa_wm_write.g4b.gen5"
96 };
97 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
98 {
99 #include "shaders/render/exa_wm_xy.g4b.gen5"
100 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
101 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
102 #include "shaders/render/exa_wm_write.g4b.gen5"
103 };
104
105 /* programs for Sandybridge */
106 static const uint32_t sf_kernel_static_gen6[][4] = 
107 {
108 };
109
110 static const uint32_t ps_kernel_static_gen6[][4] = {
111 #include "shaders/render/exa_wm_src_affine.g6b"
112 #include "shaders/render/exa_wm_src_sample_planar.g6b"
113 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
114 #include "shaders/render/exa_wm_yuv_rgb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
119 #include "shaders/render/exa_wm_src_affine.g6b"
120 #include "shaders/render/exa_wm_src_sample_argb.g6b"
121 #include "shaders/render/exa_wm_write.g6b"
122 };
123
124 /* programs for Ivybridge */
125 static const uint32_t sf_kernel_static_gen7[][4] = 
126 {
127 };
128
129 static const uint32_t ps_kernel_static_gen7[][4] = {
130 #include "shaders/render/exa_wm_src_affine.g7b"
131 #include "shaders/render/exa_wm_src_sample_planar.g7b"
132 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
133 #include "shaders/render/exa_wm_yuv_rgb.g7b"
134 #include "shaders/render/exa_wm_write.g7b"
135 };
136
137 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
138 #include "shaders/render/exa_wm_src_affine.g7b"
139 #include "shaders/render/exa_wm_src_sample_argb.g7b"
140 #include "shaders/render/exa_wm_write.g7b"
141 };
142
143 /* Programs for Haswell */
144 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
145 #include "shaders/render/exa_wm_src_affine.g7b"
146 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
147 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
148 #include "shaders/render/exa_wm_yuv_rgb.g7b"
149 #include "shaders/render/exa_wm_write.g7b"
150 };
151
152
153 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
154
155 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
156 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
157
158 static uint32_t float_to_uint (float f) 
159 {
160     union {
161         uint32_t i; 
162         float f;
163     } x;
164
165     x.f = f;
166     return x.i;
167 }
168
169 enum 
170 {
171     SF_KERNEL = 0,
172     PS_KERNEL,
173     PS_SUBPIC_KERNEL
174 };
175
176 static struct i965_kernel render_kernels_gen4[] = {
177     {
178         "SF",
179         SF_KERNEL,
180         sf_kernel_static,
181         sizeof(sf_kernel_static),
182         NULL
183     },
184     {
185         "PS",
186         PS_KERNEL,
187         ps_kernel_static,
188         sizeof(ps_kernel_static),
189         NULL
190     },
191
192     {
193         "PS_SUBPIC",
194         PS_SUBPIC_KERNEL,
195         ps_subpic_kernel_static,
196         sizeof(ps_subpic_kernel_static),
197         NULL
198     }
199 };
200
201 static struct i965_kernel render_kernels_gen5[] = {
202     {
203         "SF",
204         SF_KERNEL,
205         sf_kernel_static_gen5,
206         sizeof(sf_kernel_static_gen5),
207         NULL
208     },
209     {
210         "PS",
211         PS_KERNEL,
212         ps_kernel_static_gen5,
213         sizeof(ps_kernel_static_gen5),
214         NULL
215     },
216
217     {
218         "PS_SUBPIC",
219         PS_SUBPIC_KERNEL,
220         ps_subpic_kernel_static_gen5,
221         sizeof(ps_subpic_kernel_static_gen5),
222         NULL
223     }
224 };
225
226 static struct i965_kernel render_kernels_gen6[] = {
227     {
228         "SF",
229         SF_KERNEL,
230         sf_kernel_static_gen6,
231         sizeof(sf_kernel_static_gen6),
232         NULL
233     },
234     {
235         "PS",
236         PS_KERNEL,
237         ps_kernel_static_gen6,
238         sizeof(ps_kernel_static_gen6),
239         NULL
240     },
241
242     {
243         "PS_SUBPIC",
244         PS_SUBPIC_KERNEL,
245         ps_subpic_kernel_static_gen6,
246         sizeof(ps_subpic_kernel_static_gen6),
247         NULL
248     }
249 };
250
251 static struct i965_kernel render_kernels_gen7[] = {
252     {
253         "SF",
254         SF_KERNEL,
255         sf_kernel_static_gen7,
256         sizeof(sf_kernel_static_gen7),
257         NULL
258     },
259     {
260         "PS",
261         PS_KERNEL,
262         ps_kernel_static_gen7,
263         sizeof(ps_kernel_static_gen7),
264         NULL
265     },
266
267     {
268         "PS_SUBPIC",
269         PS_SUBPIC_KERNEL,
270         ps_subpic_kernel_static_gen7,
271         sizeof(ps_subpic_kernel_static_gen7),
272         NULL
273     }
274 };
275
276 static struct i965_kernel render_kernels_gen7_haswell[] = {
277     {
278         "SF",
279         SF_KERNEL,
280         sf_kernel_static_gen7,
281         sizeof(sf_kernel_static_gen7),
282         NULL
283     },
284     {
285         "PS",
286         PS_KERNEL,
287         ps_kernel_static_gen7_haswell,
288         sizeof(ps_kernel_static_gen7_haswell),
289         NULL
290     },
291
292     {
293         "PS_SUBPIC",
294         PS_SUBPIC_KERNEL,
295         ps_subpic_kernel_static_gen7,
296         sizeof(ps_subpic_kernel_static_gen7),
297         NULL
298     }
299 };
300
301 #define URB_VS_ENTRIES        8
302 #define URB_VS_ENTRY_SIZE     1
303
304 #define URB_GS_ENTRIES        0
305 #define URB_GS_ENTRY_SIZE     0
306
307 #define URB_CLIP_ENTRIES      0
308 #define URB_CLIP_ENTRY_SIZE   0
309
310 #define URB_SF_ENTRIES        1
311 #define URB_SF_ENTRY_SIZE     2
312
313 #define URB_CS_ENTRIES        4
314 #define URB_CS_ENTRY_SIZE     4
315
316 static void
317 i965_render_vs_unit(VADriverContextP ctx)
318 {
319     struct i965_driver_data *i965 = i965_driver_data(ctx);
320     struct i965_render_state *render_state = &i965->render_state;
321     struct i965_vs_unit_state *vs_state;
322
323     dri_bo_map(render_state->vs.state, 1);
324     assert(render_state->vs.state->virtual);
325     vs_state = render_state->vs.state->virtual;
326     memset(vs_state, 0, sizeof(*vs_state));
327
328     if (IS_IRONLAKE(i965->intel.device_info))
329         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
330     else
331         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
332
333     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
334     vs_state->vs6.vs_enable = 0;
335     vs_state->vs6.vert_cache_disable = 1;
336     
337     dri_bo_unmap(render_state->vs.state);
338 }
339
340 static void
341 i965_render_sf_unit(VADriverContextP ctx)
342 {
343     struct i965_driver_data *i965 = i965_driver_data(ctx);
344     struct i965_render_state *render_state = &i965->render_state;
345     struct i965_sf_unit_state *sf_state;
346
347     dri_bo_map(render_state->sf.state, 1);
348     assert(render_state->sf.state->virtual);
349     sf_state = render_state->sf.state->virtual;
350     memset(sf_state, 0, sizeof(*sf_state));
351
352     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
353     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
354
355     sf_state->sf1.single_program_flow = 1; /* XXX */
356     sf_state->sf1.binding_table_entry_count = 0;
357     sf_state->sf1.thread_priority = 0;
358     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
359     sf_state->sf1.illegal_op_exception_enable = 1;
360     sf_state->sf1.mask_stack_exception_enable = 1;
361     sf_state->sf1.sw_exception_enable = 1;
362
363     /* scratch space is not used in our kernel */
364     sf_state->thread2.per_thread_scratch_space = 0;
365     sf_state->thread2.scratch_space_base_pointer = 0;
366
367     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
368     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
369     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
370     sf_state->thread3.urb_entry_read_offset = 0;
371     sf_state->thread3.dispatch_grf_start_reg = 3;
372
373     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
374     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
375     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
376     sf_state->thread4.stats_enable = 1;
377
378     sf_state->sf5.viewport_transform = 0; /* skip viewport */
379
380     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
381     sf_state->sf6.scissor = 0;
382
383     sf_state->sf7.trifan_pv = 2;
384
385     sf_state->sf6.dest_org_vbias = 0x8;
386     sf_state->sf6.dest_org_hbias = 0x8;
387
388     dri_bo_emit_reloc(render_state->sf.state,
389                       I915_GEM_DOMAIN_INSTRUCTION, 0,
390                       sf_state->thread0.grf_reg_count << 1,
391                       offsetof(struct i965_sf_unit_state, thread0),
392                       render_state->render_kernels[SF_KERNEL].bo);
393
394     dri_bo_unmap(render_state->sf.state);
395 }
396
397 static void 
398 i965_render_sampler(VADriverContextP ctx)
399 {
400     struct i965_driver_data *i965 = i965_driver_data(ctx);
401     struct i965_render_state *render_state = &i965->render_state;
402     struct i965_sampler_state *sampler_state;
403     int i;
404     
405     assert(render_state->wm.sampler_count > 0);
406     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
407
408     dri_bo_map(render_state->wm.sampler, 1);
409     assert(render_state->wm.sampler->virtual);
410     sampler_state = render_state->wm.sampler->virtual;
411     for (i = 0; i < render_state->wm.sampler_count; i++) {
412         memset(sampler_state, 0, sizeof(*sampler_state));
413         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
414         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
415         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
416         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
417         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
418         sampler_state++;
419     }
420
421     dri_bo_unmap(render_state->wm.sampler);
422 }
423 static void
424 i965_subpic_render_wm_unit(VADriverContextP ctx)
425 {
426     struct i965_driver_data *i965 = i965_driver_data(ctx);
427     struct i965_render_state *render_state = &i965->render_state;
428     struct i965_wm_unit_state *wm_state;
429
430     assert(render_state->wm.sampler);
431
432     dri_bo_map(render_state->wm.state, 1);
433     assert(render_state->wm.state->virtual);
434     wm_state = render_state->wm.state->virtual;
435     memset(wm_state, 0, sizeof(*wm_state));
436
437     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
438     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
439
440     wm_state->thread1.single_program_flow = 1; /* XXX */
441
442     if (IS_IRONLAKE(i965->intel.device_info))
443         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
444     else
445         wm_state->thread1.binding_table_entry_count = 7;
446
447     wm_state->thread2.scratch_space_base_pointer = 0;
448     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
449
450     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
451     wm_state->thread3.const_urb_entry_read_length = 4;
452     wm_state->thread3.const_urb_entry_read_offset = 0;
453     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
454     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
455
456     wm_state->wm4.stats_enable = 0;
457     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
458
459     if (IS_IRONLAKE(i965->intel.device_info)) {
460         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
461     } else {
462         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
463     }
464
465     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
466     wm_state->wm5.thread_dispatch_enable = 1;
467     wm_state->wm5.enable_16_pix = 1;
468     wm_state->wm5.enable_8_pix = 0;
469     wm_state->wm5.early_depth_test = 1;
470
471     dri_bo_emit_reloc(render_state->wm.state,
472                       I915_GEM_DOMAIN_INSTRUCTION, 0,
473                       wm_state->thread0.grf_reg_count << 1,
474                       offsetof(struct i965_wm_unit_state, thread0),
475                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
476
477     dri_bo_emit_reloc(render_state->wm.state,
478                       I915_GEM_DOMAIN_INSTRUCTION, 0,
479                       wm_state->wm4.sampler_count << 2,
480                       offsetof(struct i965_wm_unit_state, wm4),
481                       render_state->wm.sampler);
482
483     dri_bo_unmap(render_state->wm.state);
484 }
485
486
487 static void
488 i965_render_wm_unit(VADriverContextP ctx)
489 {
490     struct i965_driver_data *i965 = i965_driver_data(ctx);
491     struct i965_render_state *render_state = &i965->render_state;
492     struct i965_wm_unit_state *wm_state;
493
494     assert(render_state->wm.sampler);
495
496     dri_bo_map(render_state->wm.state, 1);
497     assert(render_state->wm.state->virtual);
498     wm_state = render_state->wm.state->virtual;
499     memset(wm_state, 0, sizeof(*wm_state));
500
501     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
502     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
503
504     wm_state->thread1.single_program_flow = 1; /* XXX */
505
506     if (IS_IRONLAKE(i965->intel.device_info))
507         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
508     else
509         wm_state->thread1.binding_table_entry_count = 7;
510
511     wm_state->thread2.scratch_space_base_pointer = 0;
512     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
513
514     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
515     wm_state->thread3.const_urb_entry_read_length = 4;
516     wm_state->thread3.const_urb_entry_read_offset = 0;
517     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
518     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
519
520     wm_state->wm4.stats_enable = 0;
521     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
522
523     if (IS_IRONLAKE(i965->intel.device_info)) {
524         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
525     } else {
526         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
527     }
528
529     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
530     wm_state->wm5.thread_dispatch_enable = 1;
531     wm_state->wm5.enable_16_pix = 1;
532     wm_state->wm5.enable_8_pix = 0;
533     wm_state->wm5.early_depth_test = 1;
534
535     dri_bo_emit_reloc(render_state->wm.state,
536                       I915_GEM_DOMAIN_INSTRUCTION, 0,
537                       wm_state->thread0.grf_reg_count << 1,
538                       offsetof(struct i965_wm_unit_state, thread0),
539                       render_state->render_kernels[PS_KERNEL].bo);
540
541     dri_bo_emit_reloc(render_state->wm.state,
542                       I915_GEM_DOMAIN_INSTRUCTION, 0,
543                       wm_state->wm4.sampler_count << 2,
544                       offsetof(struct i965_wm_unit_state, wm4),
545                       render_state->wm.sampler);
546
547     dri_bo_unmap(render_state->wm.state);
548 }
549
550 static void 
551 i965_render_cc_viewport(VADriverContextP ctx)
552 {
553     struct i965_driver_data *i965 = i965_driver_data(ctx);
554     struct i965_render_state *render_state = &i965->render_state;
555     struct i965_cc_viewport *cc_viewport;
556
557     dri_bo_map(render_state->cc.viewport, 1);
558     assert(render_state->cc.viewport->virtual);
559     cc_viewport = render_state->cc.viewport->virtual;
560     memset(cc_viewport, 0, sizeof(*cc_viewport));
561     
562     cc_viewport->min_depth = -1.e35;
563     cc_viewport->max_depth = 1.e35;
564
565     dri_bo_unmap(render_state->cc.viewport);
566 }
567
568 static void 
569 i965_subpic_render_cc_unit(VADriverContextP ctx)
570 {
571     struct i965_driver_data *i965 = i965_driver_data(ctx);
572     struct i965_render_state *render_state = &i965->render_state;
573     struct i965_cc_unit_state *cc_state;
574
575     assert(render_state->cc.viewport);
576
577     dri_bo_map(render_state->cc.state, 1);
578     assert(render_state->cc.state->virtual);
579     cc_state = render_state->cc.state->virtual;
580     memset(cc_state, 0, sizeof(*cc_state));
581
582     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
583     cc_state->cc2.depth_test = 0;       /* disable depth test */
584     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
585     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
586     cc_state->cc3.blend_enable = 1;     /* enable color blend */
587     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
588     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
589     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
590     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
591
592     cc_state->cc5.dither_enable = 0;    /* disable dither */
593     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
594     cc_state->cc5.statistics_enable = 1;
595     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
596     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
597     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
598
599     cc_state->cc6.clamp_post_alpha_blend = 0; 
600     cc_state->cc6.clamp_pre_alpha_blend  =0; 
601     
602     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
603     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
604     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
605     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
606    
607     /*alpha test reference*/
608     cc_state->cc7.alpha_ref.f =0.0 ;
609
610
611     dri_bo_emit_reloc(render_state->cc.state,
612                       I915_GEM_DOMAIN_INSTRUCTION, 0,
613                       0,
614                       offsetof(struct i965_cc_unit_state, cc4),
615                       render_state->cc.viewport);
616
617     dri_bo_unmap(render_state->cc.state);
618 }
619
620
621 static void 
622 i965_render_cc_unit(VADriverContextP ctx)
623 {
624     struct i965_driver_data *i965 = i965_driver_data(ctx);
625     struct i965_render_state *render_state = &i965->render_state;
626     struct i965_cc_unit_state *cc_state;
627
628     assert(render_state->cc.viewport);
629
630     dri_bo_map(render_state->cc.state, 1);
631     assert(render_state->cc.state->virtual);
632     cc_state = render_state->cc.state->virtual;
633     memset(cc_state, 0, sizeof(*cc_state));
634
635     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
636     cc_state->cc2.depth_test = 0;       /* disable depth test */
637     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
638     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
639     cc_state->cc3.blend_enable = 0;     /* disable color blend */
640     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
641     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
642
643     cc_state->cc5.dither_enable = 0;    /* disable dither */
644     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
645     cc_state->cc5.statistics_enable = 1;
646     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
647     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
648     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
649
650     dri_bo_emit_reloc(render_state->cc.state,
651                       I915_GEM_DOMAIN_INSTRUCTION, 0,
652                       0,
653                       offsetof(struct i965_cc_unit_state, cc4),
654                       render_state->cc.viewport);
655
656     dri_bo_unmap(render_state->cc.state);
657 }
658
659 static void
660 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
661 {
662     switch (tiling) {
663     case I915_TILING_NONE:
664         ss->ss3.tiled_surface = 0;
665         ss->ss3.tile_walk = 0;
666         break;
667     case I915_TILING_X:
668         ss->ss3.tiled_surface = 1;
669         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
670         break;
671     case I915_TILING_Y:
672         ss->ss3.tiled_surface = 1;
673         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
674         break;
675     }
676 }
677
678 static void
679 i965_render_set_surface_state(
680     struct i965_surface_state *ss,
681     dri_bo                    *bo,
682     unsigned long              offset,
683     unsigned int               width,
684     unsigned int               height,
685     unsigned int               pitch,
686     unsigned int               format,
687     unsigned int               flags
688 )
689 {
690     unsigned int tiling;
691     unsigned int swizzle;
692
693     memset(ss, 0, sizeof(*ss));
694
695     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
696     case VA_BOTTOM_FIELD:
697         ss->ss0.vert_line_stride_ofs = 1;
698         /* fall-through */
699     case VA_TOP_FIELD:
700         ss->ss0.vert_line_stride = 1;
701         height /= 2;
702         break;
703     }
704
705     ss->ss0.surface_type = I965_SURFACE_2D;
706     ss->ss0.surface_format = format;
707     ss->ss0.color_blend = 1;
708
709     ss->ss1.base_addr = bo->offset + offset;
710
711     ss->ss2.width = width - 1;
712     ss->ss2.height = height - 1;
713
714     ss->ss3.pitch = pitch - 1;
715
716     dri_bo_get_tiling(bo, &tiling, &swizzle);
717     i965_render_set_surface_tiling(ss, tiling);
718 }
719
720 static void
721 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
722 {
723    switch (tiling) {
724    case I915_TILING_NONE:
725       ss->ss0.tiled_surface = 0;
726       ss->ss0.tile_walk = 0;
727       break;
728    case I915_TILING_X:
729       ss->ss0.tiled_surface = 1;
730       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
731       break;
732    case I915_TILING_Y:
733       ss->ss0.tiled_surface = 1;
734       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
735       break;
736    }
737 }
738
739 /* Set "Shader Channel Select" */
740 void
741 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
742 {
743     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
744     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
745     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
746     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
747 }
748
749 static void
750 gen7_render_set_surface_state(
751     struct gen7_surface_state *ss,
752     dri_bo                    *bo,
753     unsigned long              offset,
754     int                        width,
755     int                        height,
756     int                        pitch,
757     int                        format,
758     unsigned int               flags
759 )
760 {
761     unsigned int tiling;
762     unsigned int swizzle;
763
764     memset(ss, 0, sizeof(*ss));
765
766     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
767     case VA_BOTTOM_FIELD:
768         ss->ss0.vert_line_stride_ofs = 1;
769         /* fall-through */
770     case VA_TOP_FIELD:
771         ss->ss0.vert_line_stride = 1;
772         height /= 2;
773         break;
774     }
775
776     ss->ss0.surface_type = I965_SURFACE_2D;
777     ss->ss0.surface_format = format;
778
779     ss->ss1.base_addr = bo->offset + offset;
780
781     ss->ss2.width = width - 1;
782     ss->ss2.height = height - 1;
783
784     ss->ss3.pitch = pitch - 1;
785
786     dri_bo_get_tiling(bo, &tiling, &swizzle);
787     gen7_render_set_surface_tiling(ss, tiling);
788 }
789
790
791 static void
792 i965_render_src_surface_state(
793     VADriverContextP ctx, 
794     int              index,
795     dri_bo          *region,
796     unsigned long    offset,
797     int              w,
798     int              h,
799     int              pitch,
800     int              format,
801     unsigned int     flags
802 )
803 {
804     struct i965_driver_data *i965 = i965_driver_data(ctx);  
805     struct i965_render_state *render_state = &i965->render_state;
806     void *ss;
807     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
808
809     assert(index < MAX_RENDER_SURFACES);
810
811     dri_bo_map(ss_bo, 1);
812     assert(ss_bo->virtual);
813     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
814
815     if (IS_GEN7(i965->intel.device_info)) {
816         gen7_render_set_surface_state(ss,
817                                       region, offset,
818                                       w, h,
819                                       pitch, format, flags);
820         if (IS_HASWELL(i965->intel.device_info))
821             gen7_render_set_surface_scs(ss);
822         dri_bo_emit_reloc(ss_bo,
823                           I915_GEM_DOMAIN_SAMPLER, 0,
824                           offset,
825                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
826                           region);
827     } else {
828         i965_render_set_surface_state(ss,
829                                       region, offset,
830                                       w, h,
831                                       pitch, format, flags);
832         dri_bo_emit_reloc(ss_bo,
833                           I915_GEM_DOMAIN_SAMPLER, 0,
834                           offset,
835                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
836                           region);
837     }
838
839     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
840     dri_bo_unmap(ss_bo);
841     render_state->wm.sampler_count++;
842 }
843
844 static void
845 i965_render_src_surfaces_state(
846     VADriverContextP ctx,
847     struct object_surface *obj_surface,
848     unsigned int     flags
849 )
850 {
851     int region_pitch;
852     int rw, rh;
853     dri_bo *region;
854
855     region_pitch = obj_surface->width;
856     rw = obj_surface->orig_width;
857     rh = obj_surface->orig_height;
858     region = obj_surface->bo;
859
860     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
861     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
862
863     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
864         return;
865
866     if (obj_surface->fourcc == VA_FOURCC_NV12) {
867         i965_render_src_surface_state(ctx, 3, region,
868                                       region_pitch * obj_surface->y_cb_offset,
869                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
870                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
871         i965_render_src_surface_state(ctx, 4, region,
872                                       region_pitch * obj_surface->y_cb_offset,
873                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
874                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
875     } else {
876         i965_render_src_surface_state(ctx, 3, region,
877                                       region_pitch * obj_surface->y_cb_offset,
878                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
879                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
880         i965_render_src_surface_state(ctx, 4, region,
881                                       region_pitch * obj_surface->y_cb_offset,
882                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
883                                       I965_SURFACEFORMAT_R8_UNORM, flags);
884         i965_render_src_surface_state(ctx, 5, region,
885                                       region_pitch * obj_surface->y_cr_offset,
886                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
887                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
888         i965_render_src_surface_state(ctx, 6, region,
889                                       region_pitch * obj_surface->y_cr_offset,
890                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
891                                       I965_SURFACEFORMAT_R8_UNORM, flags);
892     }
893 }
894
895 static void
896 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
897                                       struct object_surface *obj_surface)
898 {
899     dri_bo *subpic_region;
900     unsigned int index = obj_surface->subpic_render_idx;
901     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
902     struct object_image *obj_image = obj_subpic->obj_image;
903
904     assert(obj_surface);
905     assert(obj_surface->bo);
906     subpic_region = obj_image->bo;
907     /*subpicture surface*/
908     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
909     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
910 }
911
912 static void
913 i965_render_dest_surface_state(VADriverContextP ctx, int index)
914 {
915     struct i965_driver_data *i965 = i965_driver_data(ctx);  
916     struct i965_render_state *render_state = &i965->render_state;
917     struct intel_region *dest_region = render_state->draw_region;
918     void *ss;
919     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
920     int format;
921     assert(index < MAX_RENDER_SURFACES);
922
923     if (dest_region->cpp == 2) {
924         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
925     } else {
926         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
927     }
928
929     dri_bo_map(ss_bo, 1);
930     assert(ss_bo->virtual);
931     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
932
933     if (IS_GEN7(i965->intel.device_info)) {
934         gen7_render_set_surface_state(ss,
935                                       dest_region->bo, 0,
936                                       dest_region->width, dest_region->height,
937                                       dest_region->pitch, format, 0);
938         if (IS_HASWELL(i965->intel.device_info))
939             gen7_render_set_surface_scs(ss);
940         dri_bo_emit_reloc(ss_bo,
941                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
942                           0,
943                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
944                           dest_region->bo);
945     } else {
946         i965_render_set_surface_state(ss,
947                                       dest_region->bo, 0,
948                                       dest_region->width, dest_region->height,
949                                       dest_region->pitch, format, 0);
950         dri_bo_emit_reloc(ss_bo,
951                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
952                           0,
953                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
954                           dest_region->bo);
955     }
956
957     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
958     dri_bo_unmap(ss_bo);
959 }
960
961 static void
962 i965_fill_vertex_buffer(
963     VADriverContextP ctx,
964     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
965     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
966 )
967 {
968     struct i965_driver_data * const i965 = i965_driver_data(ctx);
969     float vb[12];
970
971     enum { X1, Y1, X2, Y2 };
972
973     static const unsigned int g_rotation_indices[][6] = {
974         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
975         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
976         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
977         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
978     };
979
980     const unsigned int * const rotation_indices =
981         g_rotation_indices[i965->rotation_attrib->value];
982
983     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
984     vb[1]  = tex_coords[rotation_indices[1]];
985     vb[2]  = vid_coords[X2];
986     vb[3]  = vid_coords[Y2];
987
988     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
989     vb[5]  = tex_coords[rotation_indices[3]];
990     vb[6]  = vid_coords[X1];
991     vb[7]  = vid_coords[Y2];
992
993     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
994     vb[9]  = tex_coords[rotation_indices[5]];
995     vb[10] = vid_coords[X1];
996     vb[11] = vid_coords[Y1];
997
998     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
999 }
1000
1001 static void 
1002 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1003                                  struct object_surface *obj_surface,
1004                                  const VARectangle *output_rect)
1005 {    
1006     unsigned int index = obj_surface->subpic_render_idx;
1007     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1008     float tex_coords[4], vid_coords[4];
1009     VARectangle dst_rect;
1010
1011     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1012         dst_rect = obj_subpic->dst_rect;
1013     else {
1014         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1015         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1016         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1017         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1018         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1019         dst_rect.height = sy * obj_subpic->dst_rect.height;
1020     }
1021
1022     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1023     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1024     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1025     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1026
1027     vid_coords[0] = dst_rect.x;
1028     vid_coords[1] = dst_rect.y;
1029     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1030     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1031
1032     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1033 }
1034
1035 static void 
1036 i965_render_upload_vertex(
1037     VADriverContextP   ctx,
1038     struct object_surface *obj_surface,
1039     const VARectangle *src_rect,
1040     const VARectangle *dst_rect
1041 )
1042 {
1043     struct i965_driver_data *i965 = i965_driver_data(ctx);
1044     struct i965_render_state *render_state = &i965->render_state;
1045     struct intel_region *dest_region = render_state->draw_region;
1046     float tex_coords[4], vid_coords[4];
1047     int width, height;
1048
1049     width  = obj_surface->orig_width;
1050     height = obj_surface->orig_height;
1051
1052     tex_coords[0] = (float)src_rect->x / width;
1053     tex_coords[1] = (float)src_rect->y / height;
1054     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1055     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1056
1057     vid_coords[0] = dest_region->x + dst_rect->x;
1058     vid_coords[1] = dest_region->y + dst_rect->y;
1059     vid_coords[2] = vid_coords[0] + dst_rect->width;
1060     vid_coords[3] = vid_coords[1] + dst_rect->height;
1061
1062     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1063 }
1064
1065 #define PI  3.1415926
1066
1067 static void
1068 i965_render_upload_constants(VADriverContextP ctx,
1069                              struct object_surface *obj_surface,
1070                              unsigned int flags)
1071 {
1072     struct i965_driver_data *i965 = i965_driver_data(ctx);
1073     struct i965_render_state *render_state = &i965->render_state;
1074     unsigned short *constant_buffer;
1075     float *color_balance_base;
1076     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1077     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1078     float hue = (float)i965->hue_attrib->value / 180 * PI;
1079     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1080     float *yuv_to_rgb;
1081     const float* yuv_coefs;
1082     size_t coefs_length;
1083
1084     dri_bo_map(render_state->curbe.bo, 1);
1085     assert(render_state->curbe.bo->virtual);
1086     constant_buffer = render_state->curbe.bo->virtual;
1087
1088     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1089         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1090
1091         constant_buffer[0] = 2;
1092     } else {
1093         if (obj_surface->fourcc == VA_FOURCC_NV12)
1094             constant_buffer[0] = 1;
1095         else
1096             constant_buffer[0] = 0;
1097     }
1098
1099     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1100         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1101         i965->hue_attrib->value == DEFAULT_HUE &&
1102         i965->saturation_attrib->value == DEFAULT_SATURATION)
1103         constant_buffer[1] = 1; /* skip color balance transformation */
1104     else
1105         constant_buffer[1] = 0;
1106
1107     color_balance_base = (float *)constant_buffer + 4;
1108     *color_balance_base++ = contrast;
1109     *color_balance_base++ = brightness;
1110     *color_balance_base++ = cos(hue) * contrast * saturation;
1111     *color_balance_base++ = sin(hue) * contrast * saturation;
1112
1113     yuv_to_rgb = (float *)constant_buffer + 8;
1114     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(flags & VA_SRC_COLOR_MASK),
1115                                              &coefs_length);
1116     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
1117
1118     dri_bo_unmap(render_state->curbe.bo);
1119 }
1120
1121 static void
1122 i965_subpic_render_upload_constants(VADriverContextP ctx,
1123                                     struct object_surface *obj_surface)
1124 {
1125     struct i965_driver_data *i965 = i965_driver_data(ctx);
1126     struct i965_render_state *render_state = &i965->render_state;
1127     float *constant_buffer;
1128     float global_alpha = 1.0;
1129     unsigned int index = obj_surface->subpic_render_idx;
1130     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1131     
1132     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1133         global_alpha = obj_subpic->global_alpha;
1134     }
1135
1136     dri_bo_map(render_state->curbe.bo, 1);
1137
1138     assert(render_state->curbe.bo->virtual);
1139     constant_buffer = render_state->curbe.bo->virtual;
1140     *constant_buffer = global_alpha;
1141
1142     dri_bo_unmap(render_state->curbe.bo);
1143 }
1144  
1145 static void
1146 i965_surface_render_state_setup(
1147     VADriverContextP   ctx,
1148     struct object_surface *obj_surface,
1149     const VARectangle *src_rect,
1150     const VARectangle *dst_rect,
1151     unsigned int       flags
1152 )
1153 {
1154     i965_render_vs_unit(ctx);
1155     i965_render_sf_unit(ctx);
1156     i965_render_dest_surface_state(ctx, 0);
1157     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1158     i965_render_sampler(ctx);
1159     i965_render_wm_unit(ctx);
1160     i965_render_cc_viewport(ctx);
1161     i965_render_cc_unit(ctx);
1162     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1163     i965_render_upload_constants(ctx, obj_surface, flags);
1164 }
1165
1166 static void
1167 i965_subpic_render_state_setup(
1168     VADriverContextP   ctx,
1169     struct object_surface *obj_surface,
1170     const VARectangle *src_rect,
1171     const VARectangle *dst_rect
1172 )
1173 {
1174     i965_render_vs_unit(ctx);
1175     i965_render_sf_unit(ctx);
1176     i965_render_dest_surface_state(ctx, 0);
1177     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1178     i965_render_sampler(ctx);
1179     i965_subpic_render_wm_unit(ctx);
1180     i965_render_cc_viewport(ctx);
1181     i965_subpic_render_cc_unit(ctx);
1182     i965_subpic_render_upload_constants(ctx, obj_surface);
1183     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1184 }
1185
1186
1187 static void
1188 i965_render_pipeline_select(VADriverContextP ctx)
1189 {
1190     struct i965_driver_data *i965 = i965_driver_data(ctx);
1191     struct intel_batchbuffer *batch = i965->batch;
1192  
1193     BEGIN_BATCH(batch, 1);
1194     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1195     ADVANCE_BATCH(batch);
1196 }
1197
1198 static void
1199 i965_render_state_sip(VADriverContextP ctx)
1200 {
1201     struct i965_driver_data *i965 = i965_driver_data(ctx);
1202     struct intel_batchbuffer *batch = i965->batch;
1203
1204     BEGIN_BATCH(batch, 2);
1205     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1206     OUT_BATCH(batch, 0);
1207     ADVANCE_BATCH(batch);
1208 }
1209
1210 static void
1211 i965_render_state_base_address(VADriverContextP ctx)
1212 {
1213     struct i965_driver_data *i965 = i965_driver_data(ctx);
1214     struct intel_batchbuffer *batch = i965->batch;
1215     struct i965_render_state *render_state = &i965->render_state;
1216
1217     if (IS_IRONLAKE(i965->intel.device_info)) {
1218         BEGIN_BATCH(batch, 8);
1219         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1220         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1221         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1222         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1223         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1224         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1225         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1226         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1227         ADVANCE_BATCH(batch);
1228     } else {
1229         BEGIN_BATCH(batch, 6);
1230         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1231         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1232         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1233         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1234         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1235         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1236         ADVANCE_BATCH(batch);
1237     }
1238 }
1239
1240 static void
1241 i965_render_binding_table_pointers(VADriverContextP ctx)
1242 {
1243     struct i965_driver_data *i965 = i965_driver_data(ctx);
1244     struct intel_batchbuffer *batch = i965->batch;
1245
1246     BEGIN_BATCH(batch, 6);
1247     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1248     OUT_BATCH(batch, 0); /* vs */
1249     OUT_BATCH(batch, 0); /* gs */
1250     OUT_BATCH(batch, 0); /* clip */
1251     OUT_BATCH(batch, 0); /* sf */
1252     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1253     ADVANCE_BATCH(batch);
1254 }
1255
1256 static void 
1257 i965_render_constant_color(VADriverContextP ctx)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch = i965->batch;
1261
1262     BEGIN_BATCH(batch, 5);
1263     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1264     OUT_BATCH(batch, float_to_uint(1.0));
1265     OUT_BATCH(batch, float_to_uint(0.0));
1266     OUT_BATCH(batch, float_to_uint(1.0));
1267     OUT_BATCH(batch, float_to_uint(1.0));
1268     ADVANCE_BATCH(batch);
1269 }
1270
1271 static void
1272 i965_render_pipelined_pointers(VADriverContextP ctx)
1273 {
1274     struct i965_driver_data *i965 = i965_driver_data(ctx);
1275     struct intel_batchbuffer *batch = i965->batch;
1276     struct i965_render_state *render_state = &i965->render_state;
1277
1278     BEGIN_BATCH(batch, 7);
1279     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1280     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1281     OUT_BATCH(batch, 0);  /* disable GS */
1282     OUT_BATCH(batch, 0);  /* disable CLIP */
1283     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1284     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1285     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1286     ADVANCE_BATCH(batch);
1287 }
1288
1289 static void
1290 i965_render_urb_layout(VADriverContextP ctx)
1291 {
1292     struct i965_driver_data *i965 = i965_driver_data(ctx);
1293     struct intel_batchbuffer *batch = i965->batch;
1294     int urb_vs_start, urb_vs_size;
1295     int urb_gs_start, urb_gs_size;
1296     int urb_clip_start, urb_clip_size;
1297     int urb_sf_start, urb_sf_size;
1298     int urb_cs_start, urb_cs_size;
1299
1300     urb_vs_start = 0;
1301     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1302     urb_gs_start = urb_vs_start + urb_vs_size;
1303     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1304     urb_clip_start = urb_gs_start + urb_gs_size;
1305     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1306     urb_sf_start = urb_clip_start + urb_clip_size;
1307     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1308     urb_cs_start = urb_sf_start + urb_sf_size;
1309     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1310
1311     BEGIN_BATCH(batch, 3);
1312     OUT_BATCH(batch, 
1313               CMD_URB_FENCE |
1314               UF0_CS_REALLOC |
1315               UF0_SF_REALLOC |
1316               UF0_CLIP_REALLOC |
1317               UF0_GS_REALLOC |
1318               UF0_VS_REALLOC |
1319               1);
1320     OUT_BATCH(batch, 
1321               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1322               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1323               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1324     OUT_BATCH(batch,
1325               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1326               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1327     ADVANCE_BATCH(batch);
1328 }
1329
1330 static void 
1331 i965_render_cs_urb_layout(VADriverContextP ctx)
1332 {
1333     struct i965_driver_data *i965 = i965_driver_data(ctx);
1334     struct intel_batchbuffer *batch = i965->batch;
1335
1336     BEGIN_BATCH(batch, 2);
1337     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1338     OUT_BATCH(batch,
1339               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1340               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1341     ADVANCE_BATCH(batch);
1342 }
1343
1344 static void
1345 i965_render_constant_buffer(VADriverContextP ctx)
1346 {
1347     struct i965_driver_data *i965 = i965_driver_data(ctx);
1348     struct intel_batchbuffer *batch = i965->batch;
1349     struct i965_render_state *render_state = &i965->render_state;
1350
1351     BEGIN_BATCH(batch, 2);
1352     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1353     OUT_RELOC(batch, render_state->curbe.bo,
1354               I915_GEM_DOMAIN_INSTRUCTION, 0,
1355               URB_CS_ENTRY_SIZE - 1);
1356     ADVANCE_BATCH(batch);    
1357 }
1358
1359 static void
1360 i965_render_drawing_rectangle(VADriverContextP ctx)
1361 {
1362     struct i965_driver_data *i965 = i965_driver_data(ctx);
1363     struct intel_batchbuffer *batch = i965->batch;
1364     struct i965_render_state *render_state = &i965->render_state;
1365     struct intel_region *dest_region = render_state->draw_region;
1366
1367     BEGIN_BATCH(batch, 4);
1368     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1369     OUT_BATCH(batch, 0x00000000);
1370     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1371     OUT_BATCH(batch, 0x00000000);         
1372     ADVANCE_BATCH(batch);
1373 }
1374
1375 static void
1376 i965_render_vertex_elements(VADriverContextP ctx)
1377 {
1378     struct i965_driver_data *i965 = i965_driver_data(ctx);
1379     struct intel_batchbuffer *batch = i965->batch;
1380
1381     if (IS_IRONLAKE(i965->intel.device_info)) {
1382         BEGIN_BATCH(batch, 5);
1383         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1384         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1385         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1386                   VE0_VALID |
1387                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1388                   (0 << VE0_OFFSET_SHIFT));
1389         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1390                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1391                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1392                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1393         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1394         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1395                   VE0_VALID |
1396                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1397                   (8 << VE0_OFFSET_SHIFT));
1398         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1399                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1400                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1401                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1402         ADVANCE_BATCH(batch);
1403     } else {
1404         BEGIN_BATCH(batch, 5);
1405         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1406         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1407         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1408                   VE0_VALID |
1409                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1410                   (0 << VE0_OFFSET_SHIFT));
1411         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1412                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1413                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1414                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1415                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1416         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1417         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1418                   VE0_VALID |
1419                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1420                   (8 << VE0_OFFSET_SHIFT));
1421         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1422                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1423                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1424                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1425                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1426         ADVANCE_BATCH(batch);
1427     }
1428 }
1429
1430 static void
1431 i965_render_upload_image_palette(
1432     VADriverContextP ctx,
1433     struct object_image *obj_image,
1434     unsigned int     alpha
1435 )
1436 {
1437     struct i965_driver_data *i965 = i965_driver_data(ctx);
1438     struct intel_batchbuffer *batch = i965->batch;
1439     unsigned int i;
1440
1441     assert(obj_image);
1442
1443     if (!obj_image)
1444         return;
1445
1446     if (obj_image->image.num_palette_entries == 0)
1447         return;
1448
1449     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1450     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1451     /*fill palette*/
1452     //int32_t out[16]; //0-23:color 23-31:alpha
1453     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1454         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1455     ADVANCE_BATCH(batch);
1456 }
1457
1458 static void
1459 i965_render_startup(VADriverContextP ctx)
1460 {
1461     struct i965_driver_data *i965 = i965_driver_data(ctx);
1462     struct intel_batchbuffer *batch = i965->batch;
1463     struct i965_render_state *render_state = &i965->render_state;
1464
1465     BEGIN_BATCH(batch, 11);
1466     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1467     OUT_BATCH(batch, 
1468               (0 << VB0_BUFFER_INDEX_SHIFT) |
1469               VB0_VERTEXDATA |
1470               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1471     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1472
1473     if (IS_IRONLAKE(i965->intel.device_info))
1474         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1475     else
1476         OUT_BATCH(batch, 3);
1477
1478     OUT_BATCH(batch, 0);
1479
1480     OUT_BATCH(batch, 
1481               CMD_3DPRIMITIVE |
1482               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1483               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1484               (0 << 9) |
1485               4);
1486     OUT_BATCH(batch, 3); /* vertex count per instance */
1487     OUT_BATCH(batch, 0); /* start vertex offset */
1488     OUT_BATCH(batch, 1); /* single instance */
1489     OUT_BATCH(batch, 0); /* start instance location */
1490     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1491     ADVANCE_BATCH(batch);
1492 }
1493
1494 static void 
1495 i965_clear_dest_region(VADriverContextP ctx)
1496 {
1497     struct i965_driver_data *i965 = i965_driver_data(ctx);
1498     struct intel_batchbuffer *batch = i965->batch;
1499     struct i965_render_state *render_state = &i965->render_state;
1500     struct intel_region *dest_region = render_state->draw_region;
1501     unsigned int blt_cmd, br13;
1502     int pitch;
1503
1504     blt_cmd = XY_COLOR_BLT_CMD;
1505     br13 = 0xf0 << 16;
1506     pitch = dest_region->pitch;
1507
1508     if (dest_region->cpp == 4) {
1509         br13 |= BR13_8888;
1510         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1511     } else {
1512         assert(dest_region->cpp == 2);
1513         br13 |= BR13_565;
1514     }
1515
1516     if (dest_region->tiling != I915_TILING_NONE) {
1517         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1518         pitch /= 4;
1519     }
1520
1521     br13 |= pitch;
1522
1523     if (IS_GEN6(i965->intel.device_info) ||
1524         IS_GEN7(i965->intel.device_info)) {
1525         intel_batchbuffer_start_atomic_blt(batch, 24);
1526         BEGIN_BLT_BATCH(batch, 6);
1527     } else {
1528         intel_batchbuffer_start_atomic(batch, 24);
1529         BEGIN_BATCH(batch, 6);
1530     }
1531
1532     OUT_BATCH(batch, blt_cmd);
1533     OUT_BATCH(batch, br13);
1534     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1535     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1536               (dest_region->x + dest_region->width));
1537     OUT_RELOC(batch, dest_region->bo, 
1538               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1539               0);
1540     OUT_BATCH(batch, 0x0);
1541     ADVANCE_BATCH(batch);
1542     intel_batchbuffer_end_atomic(batch);
1543 }
1544
1545 static void
1546 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1547 {
1548     struct i965_driver_data *i965 = i965_driver_data(ctx);
1549     struct intel_batchbuffer *batch = i965->batch;
1550
1551     i965_clear_dest_region(ctx);
1552     intel_batchbuffer_start_atomic(batch, 0x1000);
1553     intel_batchbuffer_emit_mi_flush(batch);
1554     i965_render_pipeline_select(ctx);
1555     i965_render_state_sip(ctx);
1556     i965_render_state_base_address(ctx);
1557     i965_render_binding_table_pointers(ctx);
1558     i965_render_constant_color(ctx);
1559     i965_render_pipelined_pointers(ctx);
1560     i965_render_urb_layout(ctx);
1561     i965_render_cs_urb_layout(ctx);
1562     i965_render_constant_buffer(ctx);
1563     i965_render_drawing_rectangle(ctx);
1564     i965_render_vertex_elements(ctx);
1565     i965_render_startup(ctx);
1566     intel_batchbuffer_end_atomic(batch);
1567 }
1568
1569 static void
1570 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1571 {
1572     struct i965_driver_data *i965 = i965_driver_data(ctx);
1573     struct intel_batchbuffer *batch = i965->batch;
1574
1575     intel_batchbuffer_start_atomic(batch, 0x1000);
1576     intel_batchbuffer_emit_mi_flush(batch);
1577     i965_render_pipeline_select(ctx);
1578     i965_render_state_sip(ctx);
1579     i965_render_state_base_address(ctx);
1580     i965_render_binding_table_pointers(ctx);
1581     i965_render_constant_color(ctx);
1582     i965_render_pipelined_pointers(ctx);
1583     i965_render_urb_layout(ctx);
1584     i965_render_cs_urb_layout(ctx);
1585     i965_render_constant_buffer(ctx);
1586     i965_render_drawing_rectangle(ctx);
1587     i965_render_vertex_elements(ctx);
1588     i965_render_startup(ctx);
1589     intel_batchbuffer_end_atomic(batch);
1590 }
1591
1592
1593 static void 
1594 i965_render_initialize(VADriverContextP ctx)
1595 {
1596     struct i965_driver_data *i965 = i965_driver_data(ctx);
1597     struct i965_render_state *render_state = &i965->render_state;
1598     dri_bo *bo;
1599
1600     /* VERTEX BUFFER */
1601     dri_bo_unreference(render_state->vb.vertex_buffer);
1602     bo = dri_bo_alloc(i965->intel.bufmgr,
1603                       "vertex buffer",
1604                       4096,
1605                       4096);
1606     assert(bo);
1607     render_state->vb.vertex_buffer = bo;
1608
1609     /* VS */
1610     dri_bo_unreference(render_state->vs.state);
1611     bo = dri_bo_alloc(i965->intel.bufmgr,
1612                       "vs state",
1613                       sizeof(struct i965_vs_unit_state),
1614                       64);
1615     assert(bo);
1616     render_state->vs.state = bo;
1617
1618     /* GS */
1619     /* CLIP */
1620     /* SF */
1621     dri_bo_unreference(render_state->sf.state);
1622     bo = dri_bo_alloc(i965->intel.bufmgr,
1623                       "sf state",
1624                       sizeof(struct i965_sf_unit_state),
1625                       64);
1626     assert(bo);
1627     render_state->sf.state = bo;
1628
1629     /* WM */
1630     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1631     bo = dri_bo_alloc(i965->intel.bufmgr,
1632                       "surface state & binding table",
1633                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1634                       4096);
1635     assert(bo);
1636     render_state->wm.surface_state_binding_table_bo = bo;
1637
1638     dri_bo_unreference(render_state->wm.sampler);
1639     bo = dri_bo_alloc(i965->intel.bufmgr,
1640                       "sampler state",
1641                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1642                       64);
1643     assert(bo);
1644     render_state->wm.sampler = bo;
1645     render_state->wm.sampler_count = 0;
1646
1647     dri_bo_unreference(render_state->wm.state);
1648     bo = dri_bo_alloc(i965->intel.bufmgr,
1649                       "wm state",
1650                       sizeof(struct i965_wm_unit_state),
1651                       64);
1652     assert(bo);
1653     render_state->wm.state = bo;
1654
1655     /* COLOR CALCULATOR */
1656     dri_bo_unreference(render_state->cc.state);
1657     bo = dri_bo_alloc(i965->intel.bufmgr,
1658                       "color calc state",
1659                       sizeof(struct i965_cc_unit_state),
1660                       64);
1661     assert(bo);
1662     render_state->cc.state = bo;
1663
1664     dri_bo_unreference(render_state->cc.viewport);
1665     bo = dri_bo_alloc(i965->intel.bufmgr,
1666                       "cc viewport",
1667                       sizeof(struct i965_cc_viewport),
1668                       64);
1669     assert(bo);
1670     render_state->cc.viewport = bo;
1671 }
1672
1673 static void
1674 i965_render_put_surface(
1675     VADriverContextP   ctx,
1676     struct object_surface *obj_surface,
1677     const VARectangle *src_rect,
1678     const VARectangle *dst_rect,
1679     unsigned int       flags
1680 )
1681 {
1682     struct i965_driver_data *i965 = i965_driver_data(ctx);
1683     struct intel_batchbuffer *batch = i965->batch;
1684
1685     i965_render_initialize(ctx);
1686     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1687     i965_surface_render_pipeline_setup(ctx);
1688     intel_batchbuffer_flush(batch);
1689 }
1690
1691 static void
1692 i965_render_put_subpicture(
1693     VADriverContextP   ctx,
1694     struct object_surface *obj_surface,
1695     const VARectangle *src_rect,
1696     const VARectangle *dst_rect
1697 )
1698 {
1699     struct i965_driver_data *i965 = i965_driver_data(ctx);
1700     struct intel_batchbuffer *batch = i965->batch;
1701     unsigned int index = obj_surface->subpic_render_idx;
1702     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1703
1704     assert(obj_subpic);
1705
1706     i965_render_initialize(ctx);
1707     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1708     i965_subpic_render_pipeline_setup(ctx);
1709     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1710     intel_batchbuffer_flush(batch);
1711 }
1712
1713 /*
1714  * for GEN6+
1715  */
1716 static void 
1717 gen6_render_initialize(VADriverContextP ctx)
1718 {
1719     struct i965_driver_data *i965 = i965_driver_data(ctx);
1720     struct i965_render_state *render_state = &i965->render_state;
1721     dri_bo *bo;
1722
1723     /* VERTEX BUFFER */
1724     dri_bo_unreference(render_state->vb.vertex_buffer);
1725     bo = dri_bo_alloc(i965->intel.bufmgr,
1726                       "vertex buffer",
1727                       4096,
1728                       4096);
1729     assert(bo);
1730     render_state->vb.vertex_buffer = bo;
1731
1732     /* WM */
1733     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1734     bo = dri_bo_alloc(i965->intel.bufmgr,
1735                       "surface state & binding table",
1736                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1737                       4096);
1738     assert(bo);
1739     render_state->wm.surface_state_binding_table_bo = bo;
1740
1741     dri_bo_unreference(render_state->wm.sampler);
1742     bo = dri_bo_alloc(i965->intel.bufmgr,
1743                       "sampler state",
1744                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1745                       4096);
1746     assert(bo);
1747     render_state->wm.sampler = bo;
1748     render_state->wm.sampler_count = 0;
1749
1750     /* COLOR CALCULATOR */
1751     dri_bo_unreference(render_state->cc.state);
1752     bo = dri_bo_alloc(i965->intel.bufmgr,
1753                       "color calc state",
1754                       sizeof(struct gen6_color_calc_state),
1755                       4096);
1756     assert(bo);
1757     render_state->cc.state = bo;
1758
1759     /* CC VIEWPORT */
1760     dri_bo_unreference(render_state->cc.viewport);
1761     bo = dri_bo_alloc(i965->intel.bufmgr,
1762                       "cc viewport",
1763                       sizeof(struct i965_cc_viewport),
1764                       4096);
1765     assert(bo);
1766     render_state->cc.viewport = bo;
1767
1768     /* BLEND STATE */
1769     dri_bo_unreference(render_state->cc.blend);
1770     bo = dri_bo_alloc(i965->intel.bufmgr,
1771                       "blend state",
1772                       sizeof(struct gen6_blend_state),
1773                       4096);
1774     assert(bo);
1775     render_state->cc.blend = bo;
1776
1777     /* DEPTH & STENCIL STATE */
1778     dri_bo_unreference(render_state->cc.depth_stencil);
1779     bo = dri_bo_alloc(i965->intel.bufmgr,
1780                       "depth & stencil state",
1781                       sizeof(struct gen6_depth_stencil_state),
1782                       4096);
1783     assert(bo);
1784     render_state->cc.depth_stencil = bo;
1785 }
1786
1787 static void
1788 gen6_render_color_calc_state(VADriverContextP ctx)
1789 {
1790     struct i965_driver_data *i965 = i965_driver_data(ctx);
1791     struct i965_render_state *render_state = &i965->render_state;
1792     struct gen6_color_calc_state *color_calc_state;
1793     
1794     dri_bo_map(render_state->cc.state, 1);
1795     assert(render_state->cc.state->virtual);
1796     color_calc_state = render_state->cc.state->virtual;
1797     memset(color_calc_state, 0, sizeof(*color_calc_state));
1798     color_calc_state->constant_r = 1.0;
1799     color_calc_state->constant_g = 0.0;
1800     color_calc_state->constant_b = 1.0;
1801     color_calc_state->constant_a = 1.0;
1802     dri_bo_unmap(render_state->cc.state);
1803 }
1804
1805 static void
1806 gen6_render_blend_state(VADriverContextP ctx)
1807 {
1808     struct i965_driver_data *i965 = i965_driver_data(ctx);
1809     struct i965_render_state *render_state = &i965->render_state;
1810     struct gen6_blend_state *blend_state;
1811     
1812     dri_bo_map(render_state->cc.blend, 1);
1813     assert(render_state->cc.blend->virtual);
1814     blend_state = render_state->cc.blend->virtual;
1815     memset(blend_state, 0, sizeof(*blend_state));
1816     blend_state->blend1.logic_op_enable = 1;
1817     blend_state->blend1.logic_op_func = 0xc;
1818     dri_bo_unmap(render_state->cc.blend);
1819 }
1820
1821 static void
1822 gen6_render_depth_stencil_state(VADriverContextP ctx)
1823 {
1824     struct i965_driver_data *i965 = i965_driver_data(ctx);
1825     struct i965_render_state *render_state = &i965->render_state;
1826     struct gen6_depth_stencil_state *depth_stencil_state;
1827     
1828     dri_bo_map(render_state->cc.depth_stencil, 1);
1829     assert(render_state->cc.depth_stencil->virtual);
1830     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1831     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1832     dri_bo_unmap(render_state->cc.depth_stencil);
1833 }
1834
1835 static void
1836 gen6_render_setup_states(
1837     VADriverContextP   ctx,
1838     struct object_surface *obj_surface,
1839     const VARectangle *src_rect,
1840     const VARectangle *dst_rect,
1841     unsigned int       flags
1842 )
1843 {
1844     i965_render_dest_surface_state(ctx, 0);
1845     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1846     i965_render_sampler(ctx);
1847     i965_render_cc_viewport(ctx);
1848     gen6_render_color_calc_state(ctx);
1849     gen6_render_blend_state(ctx);
1850     gen6_render_depth_stencil_state(ctx);
1851     i965_render_upload_constants(ctx, obj_surface, flags);
1852     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1853 }
1854
1855 static void
1856 gen6_emit_invarient_states(VADriverContextP ctx)
1857 {
1858     struct i965_driver_data *i965 = i965_driver_data(ctx);
1859     struct intel_batchbuffer *batch = i965->batch;
1860
1861     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1862
1863     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1864     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1865               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1866     OUT_BATCH(batch, 0);
1867
1868     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1869     OUT_BATCH(batch, 1);
1870
1871     /* Set system instruction pointer */
1872     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1873     OUT_BATCH(batch, 0);
1874 }
1875
1876 static void
1877 gen6_emit_state_base_address(VADriverContextP ctx)
1878 {
1879     struct i965_driver_data *i965 = i965_driver_data(ctx);
1880     struct intel_batchbuffer *batch = i965->batch;
1881     struct i965_render_state *render_state = &i965->render_state;
1882
1883     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1884     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1885     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1886     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1887     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1888     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1889     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1890     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1891     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1892     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1893 }
1894
1895 static void
1896 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1897 {
1898     struct i965_driver_data *i965 = i965_driver_data(ctx);
1899     struct intel_batchbuffer *batch = i965->batch;
1900     struct i965_render_state *render_state = &i965->render_state;
1901
1902     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1903               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1904               (4 - 2));
1905     OUT_BATCH(batch, 0);
1906     OUT_BATCH(batch, 0);
1907     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1908 }
1909
1910 static void
1911 gen6_emit_urb(VADriverContextP ctx)
1912 {
1913     struct i965_driver_data *i965 = i965_driver_data(ctx);
1914     struct intel_batchbuffer *batch = i965->batch;
1915
1916     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1917     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1918               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1919     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1920               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1921 }
1922
1923 static void
1924 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1925 {
1926     struct i965_driver_data *i965 = i965_driver_data(ctx);
1927     struct intel_batchbuffer *batch = i965->batch;
1928     struct i965_render_state *render_state = &i965->render_state;
1929
1930     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1931     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1932     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1933     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1934 }
1935
1936 static void
1937 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1938 {
1939     struct i965_driver_data *i965 = i965_driver_data(ctx);
1940     struct intel_batchbuffer *batch = i965->batch;
1941     struct i965_render_state *render_state = &i965->render_state;
1942
1943     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1944               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1945               (4 - 2));
1946     OUT_BATCH(batch, 0); /* VS */
1947     OUT_BATCH(batch, 0); /* GS */
1948     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1949 }
1950
1951 static void
1952 gen6_emit_binding_table(VADriverContextP ctx)
1953 {
1954     struct i965_driver_data *i965 = i965_driver_data(ctx);
1955     struct intel_batchbuffer *batch = i965->batch;
1956
1957     /* Binding table pointers */
1958     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1959               GEN6_BINDING_TABLE_MODIFY_PS |
1960               (4 - 2));
1961     OUT_BATCH(batch, 0);                /* vs */
1962     OUT_BATCH(batch, 0);                /* gs */
1963     /* Only the PS uses the binding table */
1964     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1965 }
1966
1967 static void
1968 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1969 {
1970     struct i965_driver_data *i965 = i965_driver_data(ctx);
1971     struct intel_batchbuffer *batch = i965->batch;
1972
1973     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1974     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1975               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1976     OUT_BATCH(batch, 0);
1977     OUT_BATCH(batch, 0);
1978     OUT_BATCH(batch, 0);
1979     OUT_BATCH(batch, 0);
1980     OUT_BATCH(batch, 0);
1981
1982     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1983     OUT_BATCH(batch, 0);
1984 }
1985
1986 static void
1987 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1988 {
1989     i965_render_drawing_rectangle(ctx);
1990 }
1991
1992 static void 
1993 gen6_emit_vs_state(VADriverContextP ctx)
1994 {
1995     struct i965_driver_data *i965 = i965_driver_data(ctx);
1996     struct intel_batchbuffer *batch = i965->batch;
1997
1998     /* disable VS constant buffer */
1999     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2000     OUT_BATCH(batch, 0);
2001     OUT_BATCH(batch, 0);
2002     OUT_BATCH(batch, 0);
2003     OUT_BATCH(batch, 0);
2004         
2005     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2006     OUT_BATCH(batch, 0); /* without VS kernel */
2007     OUT_BATCH(batch, 0);
2008     OUT_BATCH(batch, 0);
2009     OUT_BATCH(batch, 0);
2010     OUT_BATCH(batch, 0); /* pass-through */
2011 }
2012
2013 static void 
2014 gen6_emit_gs_state(VADriverContextP ctx)
2015 {
2016     struct i965_driver_data *i965 = i965_driver_data(ctx);
2017     struct intel_batchbuffer *batch = i965->batch;
2018
2019     /* disable GS constant buffer */
2020     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2021     OUT_BATCH(batch, 0);
2022     OUT_BATCH(batch, 0);
2023     OUT_BATCH(batch, 0);
2024     OUT_BATCH(batch, 0);
2025         
2026     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2027     OUT_BATCH(batch, 0); /* without GS kernel */
2028     OUT_BATCH(batch, 0);
2029     OUT_BATCH(batch, 0);
2030     OUT_BATCH(batch, 0);
2031     OUT_BATCH(batch, 0);
2032     OUT_BATCH(batch, 0); /* pass-through */
2033 }
2034
2035 static void 
2036 gen6_emit_clip_state(VADriverContextP ctx)
2037 {
2038     struct i965_driver_data *i965 = i965_driver_data(ctx);
2039     struct intel_batchbuffer *batch = i965->batch;
2040
2041     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2042     OUT_BATCH(batch, 0);
2043     OUT_BATCH(batch, 0); /* pass-through */
2044     OUT_BATCH(batch, 0);
2045 }
2046
2047 static void 
2048 gen6_emit_sf_state(VADriverContextP ctx)
2049 {
2050     struct i965_driver_data *i965 = i965_driver_data(ctx);
2051     struct intel_batchbuffer *batch = i965->batch;
2052
2053     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2054     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2055               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2056               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2057     OUT_BATCH(batch, 0);
2058     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2059     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, 0);
2063     OUT_BATCH(batch, 0);
2064     OUT_BATCH(batch, 0); /* DW9 */
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch, 0);
2067     OUT_BATCH(batch, 0);
2068     OUT_BATCH(batch, 0);
2069     OUT_BATCH(batch, 0); /* DW14 */
2070     OUT_BATCH(batch, 0);
2071     OUT_BATCH(batch, 0);
2072     OUT_BATCH(batch, 0);
2073     OUT_BATCH(batch, 0);
2074     OUT_BATCH(batch, 0); /* DW19 */
2075 }
2076
2077 static void 
2078 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2079 {
2080     struct i965_driver_data *i965 = i965_driver_data(ctx);
2081     struct intel_batchbuffer *batch = i965->batch;
2082     struct i965_render_state *render_state = &i965->render_state;
2083
2084     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2085               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2086               (5 - 2));
2087     OUT_RELOC(batch, 
2088               render_state->curbe.bo,
2089               I915_GEM_DOMAIN_INSTRUCTION, 0,
2090               (URB_CS_ENTRY_SIZE-1));
2091     OUT_BATCH(batch, 0);
2092     OUT_BATCH(batch, 0);
2093     OUT_BATCH(batch, 0);
2094
2095     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2096     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2097               I915_GEM_DOMAIN_INSTRUCTION, 0,
2098               0);
2099     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2100               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2101     OUT_BATCH(batch, 0);
2102     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2103     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2104               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2105               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2106     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2107               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2108     OUT_BATCH(batch, 0);
2109     OUT_BATCH(batch, 0);
2110 }
2111
2112 static void
2113 gen6_emit_vertex_element_state(VADriverContextP ctx)
2114 {
2115     struct i965_driver_data *i965 = i965_driver_data(ctx);
2116     struct intel_batchbuffer *batch = i965->batch;
2117
2118     /* Set up our vertex elements, sourced from the single vertex buffer. */
2119     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2120     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2121     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2122               GEN6_VE0_VALID |
2123               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2124               (0 << VE0_OFFSET_SHIFT));
2125     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2126               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2127               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2128               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2129     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2130     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2131               GEN6_VE0_VALID |
2132               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2133               (8 << VE0_OFFSET_SHIFT));
2134     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2135               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2136               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2137               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2138 }
2139
2140 static void
2141 gen6_emit_vertices(VADriverContextP ctx)
2142 {
2143     struct i965_driver_data *i965 = i965_driver_data(ctx);
2144     struct intel_batchbuffer *batch = i965->batch;
2145     struct i965_render_state *render_state = &i965->render_state;
2146
2147     BEGIN_BATCH(batch, 11);
2148     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2149     OUT_BATCH(batch, 
2150               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2151               GEN6_VB0_VERTEXDATA |
2152               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2153     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2154     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2155     OUT_BATCH(batch, 0);
2156
2157     OUT_BATCH(batch, 
2158               CMD_3DPRIMITIVE |
2159               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2160               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2161               (0 << 9) |
2162               4);
2163     OUT_BATCH(batch, 3); /* vertex count per instance */
2164     OUT_BATCH(batch, 0); /* start vertex offset */
2165     OUT_BATCH(batch, 1); /* single instance */
2166     OUT_BATCH(batch, 0); /* start instance location */
2167     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2168     ADVANCE_BATCH(batch);
2169 }
2170
2171 static void
2172 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2173 {
2174     struct i965_driver_data *i965 = i965_driver_data(ctx);
2175     struct intel_batchbuffer *batch = i965->batch;
2176
2177     intel_batchbuffer_start_atomic(batch, 0x1000);
2178     intel_batchbuffer_emit_mi_flush(batch);
2179     gen6_emit_invarient_states(ctx);
2180     gen6_emit_state_base_address(ctx);
2181     gen6_emit_viewport_state_pointers(ctx);
2182     gen6_emit_urb(ctx);
2183     gen6_emit_cc_state_pointers(ctx);
2184     gen6_emit_sampler_state_pointers(ctx);
2185     gen6_emit_vs_state(ctx);
2186     gen6_emit_gs_state(ctx);
2187     gen6_emit_clip_state(ctx);
2188     gen6_emit_sf_state(ctx);
2189     gen6_emit_wm_state(ctx, kernel);
2190     gen6_emit_binding_table(ctx);
2191     gen6_emit_depth_buffer_state(ctx);
2192     gen6_emit_drawing_rectangle(ctx);
2193     gen6_emit_vertex_element_state(ctx);
2194     gen6_emit_vertices(ctx);
2195     intel_batchbuffer_end_atomic(batch);
2196 }
2197
2198 static void
2199 gen6_render_put_surface(
2200     VADriverContextP   ctx,
2201     struct object_surface *obj_surface,
2202     const VARectangle *src_rect,
2203     const VARectangle *dst_rect,
2204     unsigned int       flags
2205 )
2206 {
2207     struct i965_driver_data *i965 = i965_driver_data(ctx);
2208     struct intel_batchbuffer *batch = i965->batch;
2209
2210     gen6_render_initialize(ctx);
2211     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2212     i965_clear_dest_region(ctx);
2213     gen6_render_emit_states(ctx, PS_KERNEL);
2214     intel_batchbuffer_flush(batch);
2215 }
2216
2217 static void
2218 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2219 {
2220     struct i965_driver_data *i965 = i965_driver_data(ctx);
2221     struct i965_render_state *render_state = &i965->render_state;
2222     struct gen6_blend_state *blend_state;
2223
2224     dri_bo_unmap(render_state->cc.state);    
2225     dri_bo_map(render_state->cc.blend, 1);
2226     assert(render_state->cc.blend->virtual);
2227     blend_state = render_state->cc.blend->virtual;
2228     memset(blend_state, 0, sizeof(*blend_state));
2229     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2230     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2231     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2232     blend_state->blend0.blend_enable = 1;
2233     blend_state->blend1.post_blend_clamp_enable = 1;
2234     blend_state->blend1.pre_blend_clamp_enable = 1;
2235     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2236     dri_bo_unmap(render_state->cc.blend);
2237 }
2238
2239 static void
2240 gen6_subpicture_render_setup_states(
2241     VADriverContextP   ctx,
2242     struct object_surface *obj_surface,
2243     const VARectangle *src_rect,
2244     const VARectangle *dst_rect
2245 )
2246 {
2247     i965_render_dest_surface_state(ctx, 0);
2248     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2249     i965_render_sampler(ctx);
2250     i965_render_cc_viewport(ctx);
2251     gen6_render_color_calc_state(ctx);
2252     gen6_subpicture_render_blend_state(ctx);
2253     gen6_render_depth_stencil_state(ctx);
2254     i965_subpic_render_upload_constants(ctx, obj_surface);
2255     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2256 }
2257
2258 static void
2259 gen6_render_put_subpicture(
2260     VADriverContextP   ctx,
2261     struct object_surface *obj_surface,
2262     const VARectangle *src_rect,
2263     const VARectangle *dst_rect
2264 )
2265 {
2266     struct i965_driver_data *i965 = i965_driver_data(ctx);
2267     struct intel_batchbuffer *batch = i965->batch;
2268     unsigned int index = obj_surface->subpic_render_idx;
2269     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2270
2271     assert(obj_subpic);
2272     gen6_render_initialize(ctx);
2273     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2274     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2275     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2276     intel_batchbuffer_flush(batch);
2277 }
2278
2279 /*
2280  * for GEN7
2281  */
2282 static void 
2283 gen7_render_initialize(VADriverContextP ctx)
2284 {
2285     struct i965_driver_data *i965 = i965_driver_data(ctx);
2286     struct i965_render_state *render_state = &i965->render_state;
2287     dri_bo *bo;
2288
2289     /* VERTEX BUFFER */
2290     dri_bo_unreference(render_state->vb.vertex_buffer);
2291     bo = dri_bo_alloc(i965->intel.bufmgr,
2292                       "vertex buffer",
2293                       4096,
2294                       4096);
2295     assert(bo);
2296     render_state->vb.vertex_buffer = bo;
2297
2298     /* WM */
2299     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2300     bo = dri_bo_alloc(i965->intel.bufmgr,
2301                       "surface state & binding table",
2302                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2303                       4096);
2304     assert(bo);
2305     render_state->wm.surface_state_binding_table_bo = bo;
2306
2307     dri_bo_unreference(render_state->wm.sampler);
2308     bo = dri_bo_alloc(i965->intel.bufmgr,
2309                       "sampler state",
2310                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2311                       4096);
2312     assert(bo);
2313     render_state->wm.sampler = bo;
2314     render_state->wm.sampler_count = 0;
2315
2316     /* COLOR CALCULATOR */
2317     dri_bo_unreference(render_state->cc.state);
2318     bo = dri_bo_alloc(i965->intel.bufmgr,
2319                       "color calc state",
2320                       sizeof(struct gen6_color_calc_state),
2321                       4096);
2322     assert(bo);
2323     render_state->cc.state = bo;
2324
2325     /* CC VIEWPORT */
2326     dri_bo_unreference(render_state->cc.viewport);
2327     bo = dri_bo_alloc(i965->intel.bufmgr,
2328                       "cc viewport",
2329                       sizeof(struct i965_cc_viewport),
2330                       4096);
2331     assert(bo);
2332     render_state->cc.viewport = bo;
2333
2334     /* BLEND STATE */
2335     dri_bo_unreference(render_state->cc.blend);
2336     bo = dri_bo_alloc(i965->intel.bufmgr,
2337                       "blend state",
2338                       sizeof(struct gen6_blend_state),
2339                       4096);
2340     assert(bo);
2341     render_state->cc.blend = bo;
2342
2343     /* DEPTH & STENCIL STATE */
2344     dri_bo_unreference(render_state->cc.depth_stencil);
2345     bo = dri_bo_alloc(i965->intel.bufmgr,
2346                       "depth & stencil state",
2347                       sizeof(struct gen6_depth_stencil_state),
2348                       4096);
2349     assert(bo);
2350     render_state->cc.depth_stencil = bo;
2351 }
2352
2353 /*
2354  * for GEN8
2355  */
2356 #define ALIGNMENT       64
2357
2358 static void
2359 gen7_render_color_calc_state(VADriverContextP ctx)
2360 {
2361     struct i965_driver_data *i965 = i965_driver_data(ctx);
2362     struct i965_render_state *render_state = &i965->render_state;
2363     struct gen6_color_calc_state *color_calc_state;
2364     
2365     dri_bo_map(render_state->cc.state, 1);
2366     assert(render_state->cc.state->virtual);
2367     color_calc_state = render_state->cc.state->virtual;
2368     memset(color_calc_state, 0, sizeof(*color_calc_state));
2369     color_calc_state->constant_r = 1.0;
2370     color_calc_state->constant_g = 0.0;
2371     color_calc_state->constant_b = 1.0;
2372     color_calc_state->constant_a = 1.0;
2373     dri_bo_unmap(render_state->cc.state);
2374 }
2375
2376 static void
2377 gen7_render_blend_state(VADriverContextP ctx)
2378 {
2379     struct i965_driver_data *i965 = i965_driver_data(ctx);
2380     struct i965_render_state *render_state = &i965->render_state;
2381     struct gen6_blend_state *blend_state;
2382     
2383     dri_bo_map(render_state->cc.blend, 1);
2384     assert(render_state->cc.blend->virtual);
2385     blend_state = render_state->cc.blend->virtual;
2386     memset(blend_state, 0, sizeof(*blend_state));
2387     blend_state->blend1.logic_op_enable = 1;
2388     blend_state->blend1.logic_op_func = 0xc;
2389     blend_state->blend1.pre_blend_clamp_enable = 1;
2390     dri_bo_unmap(render_state->cc.blend);
2391 }
2392
2393 static void
2394 gen7_render_depth_stencil_state(VADriverContextP ctx)
2395 {
2396     struct i965_driver_data *i965 = i965_driver_data(ctx);
2397     struct i965_render_state *render_state = &i965->render_state;
2398     struct gen6_depth_stencil_state *depth_stencil_state;
2399     
2400     dri_bo_map(render_state->cc.depth_stencil, 1);
2401     assert(render_state->cc.depth_stencil->virtual);
2402     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2403     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2404     dri_bo_unmap(render_state->cc.depth_stencil);
2405 }
2406
2407 static void 
2408 gen7_render_sampler(VADriverContextP ctx)
2409 {
2410     struct i965_driver_data *i965 = i965_driver_data(ctx);
2411     struct i965_render_state *render_state = &i965->render_state;
2412     struct gen7_sampler_state *sampler_state;
2413     int i;
2414     
2415     assert(render_state->wm.sampler_count > 0);
2416     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2417
2418     dri_bo_map(render_state->wm.sampler, 1);
2419     assert(render_state->wm.sampler->virtual);
2420     sampler_state = render_state->wm.sampler->virtual;
2421     for (i = 0; i < render_state->wm.sampler_count; i++) {
2422         memset(sampler_state, 0, sizeof(*sampler_state));
2423         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2424         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2425         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2426         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2427         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2428         sampler_state++;
2429     }
2430
2431     dri_bo_unmap(render_state->wm.sampler);
2432 }
2433
2434
2435 static void
2436 gen7_render_setup_states(
2437     VADriverContextP   ctx,
2438     struct object_surface *obj_surface,
2439     const VARectangle *src_rect,
2440     const VARectangle *dst_rect,
2441     unsigned int       flags
2442 )
2443 {
2444     i965_render_dest_surface_state(ctx, 0);
2445     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2446     gen7_render_sampler(ctx);
2447     i965_render_cc_viewport(ctx);
2448     gen7_render_color_calc_state(ctx);
2449     gen7_render_blend_state(ctx);
2450     gen7_render_depth_stencil_state(ctx);
2451     i965_render_upload_constants(ctx, obj_surface, flags);
2452     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2453 }
2454
2455
2456 static void
2457 gen7_emit_invarient_states(VADriverContextP ctx)
2458 {
2459     struct i965_driver_data *i965 = i965_driver_data(ctx);
2460     struct intel_batchbuffer *batch = i965->batch;
2461
2462     BEGIN_BATCH(batch, 1);
2463     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2464     ADVANCE_BATCH(batch);
2465
2466     BEGIN_BATCH(batch, 4);
2467     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2468     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2469               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2470     OUT_BATCH(batch, 0);
2471     OUT_BATCH(batch, 0);
2472     ADVANCE_BATCH(batch);
2473
2474     BEGIN_BATCH(batch, 2);
2475     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2476     OUT_BATCH(batch, 1);
2477     ADVANCE_BATCH(batch);
2478
2479     /* Set system instruction pointer */
2480     BEGIN_BATCH(batch, 2);
2481     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2482     OUT_BATCH(batch, 0);
2483     ADVANCE_BATCH(batch);
2484 }
2485
2486 static void
2487 gen7_emit_state_base_address(VADriverContextP ctx)
2488 {
2489     struct i965_driver_data *i965 = i965_driver_data(ctx);
2490     struct intel_batchbuffer *batch = i965->batch;
2491     struct i965_render_state *render_state = &i965->render_state;
2492
2493     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2494     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2495     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2496     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2497     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2498     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2499     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2500     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2501     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2502     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2503 }
2504
2505 static void
2506 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2507 {
2508     struct i965_driver_data *i965 = i965_driver_data(ctx);
2509     struct intel_batchbuffer *batch = i965->batch;
2510     struct i965_render_state *render_state = &i965->render_state;
2511
2512     BEGIN_BATCH(batch, 2);
2513     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2514     OUT_RELOC(batch,
2515               render_state->cc.viewport,
2516               I915_GEM_DOMAIN_INSTRUCTION, 0,
2517               0);
2518     ADVANCE_BATCH(batch);
2519
2520     BEGIN_BATCH(batch, 2);
2521     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2522     OUT_BATCH(batch, 0);
2523     ADVANCE_BATCH(batch);
2524 }
2525
2526 /*
2527  * URB layout on GEN7 
2528  * ----------------------------------------
2529  * | PS Push Constants (8KB) | VS entries |
2530  * ----------------------------------------
2531  */
2532 static void
2533 gen7_emit_urb(VADriverContextP ctx)
2534 {
2535     struct i965_driver_data *i965 = i965_driver_data(ctx);
2536     struct intel_batchbuffer *batch = i965->batch;
2537     unsigned int num_urb_entries = 32;
2538
2539     if (IS_HASWELL(i965->intel.device_info))
2540         num_urb_entries = 64;
2541
2542     BEGIN_BATCH(batch, 2);
2543     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2544     OUT_BATCH(batch, 8); /* in 1KBs */
2545     ADVANCE_BATCH(batch);
2546
2547     BEGIN_BATCH(batch, 2);
2548     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2549     OUT_BATCH(batch, 
2550               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2551               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2552               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2553    ADVANCE_BATCH(batch);
2554
2555    BEGIN_BATCH(batch, 2);
2556    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2557    OUT_BATCH(batch,
2558              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2559              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2560    ADVANCE_BATCH(batch);
2561
2562    BEGIN_BATCH(batch, 2);
2563    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2564    OUT_BATCH(batch,
2565              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2566              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2567    ADVANCE_BATCH(batch);
2568
2569    BEGIN_BATCH(batch, 2);
2570    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2571    OUT_BATCH(batch,
2572              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2573              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2574    ADVANCE_BATCH(batch);
2575 }
2576
2577 static void
2578 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2579 {
2580     struct i965_driver_data *i965 = i965_driver_data(ctx);
2581     struct intel_batchbuffer *batch = i965->batch;
2582     struct i965_render_state *render_state = &i965->render_state;
2583
2584     BEGIN_BATCH(batch, 2);
2585     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2586     OUT_RELOC(batch,
2587               render_state->cc.state,
2588               I915_GEM_DOMAIN_INSTRUCTION, 0,
2589               1);
2590     ADVANCE_BATCH(batch);
2591
2592     BEGIN_BATCH(batch, 2);
2593     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2594     OUT_RELOC(batch,
2595               render_state->cc.blend,
2596               I915_GEM_DOMAIN_INSTRUCTION, 0,
2597               1);
2598     ADVANCE_BATCH(batch);
2599
2600     BEGIN_BATCH(batch, 2);
2601     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2602     OUT_RELOC(batch,
2603               render_state->cc.depth_stencil,
2604               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2605               1);
2606     ADVANCE_BATCH(batch);
2607 }
2608
2609 static void
2610 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2611 {
2612     struct i965_driver_data *i965 = i965_driver_data(ctx);
2613     struct intel_batchbuffer *batch = i965->batch;
2614     struct i965_render_state *render_state = &i965->render_state;
2615
2616     BEGIN_BATCH(batch, 2);
2617     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2618     OUT_RELOC(batch,
2619               render_state->wm.sampler,
2620               I915_GEM_DOMAIN_INSTRUCTION, 0,
2621               0);
2622     ADVANCE_BATCH(batch);
2623 }
2624
2625 static void
2626 gen7_emit_binding_table(VADriverContextP ctx)
2627 {
2628     struct i965_driver_data *i965 = i965_driver_data(ctx);
2629     struct intel_batchbuffer *batch = i965->batch;
2630
2631     BEGIN_BATCH(batch, 2);
2632     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2633     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2634     ADVANCE_BATCH(batch);
2635 }
2636
2637 static void
2638 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2639 {
2640     struct i965_driver_data *i965 = i965_driver_data(ctx);
2641     struct intel_batchbuffer *batch = i965->batch;
2642
2643     BEGIN_BATCH(batch, 7);
2644     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2645     OUT_BATCH(batch,
2646               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2647               (I965_SURFACE_NULL << 29));
2648     OUT_BATCH(batch, 0);
2649     OUT_BATCH(batch, 0);
2650     OUT_BATCH(batch, 0);
2651     OUT_BATCH(batch, 0);
2652     OUT_BATCH(batch, 0);
2653     ADVANCE_BATCH(batch);
2654
2655     BEGIN_BATCH(batch, 3);
2656     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2657     OUT_BATCH(batch, 0);
2658     OUT_BATCH(batch, 0);
2659     ADVANCE_BATCH(batch);
2660 }
2661
2662 static void
2663 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2664 {
2665     i965_render_drawing_rectangle(ctx);
2666 }
2667
2668 static void 
2669 gen7_emit_vs_state(VADriverContextP ctx)
2670 {
2671     struct i965_driver_data *i965 = i965_driver_data(ctx);
2672     struct intel_batchbuffer *batch = i965->batch;
2673
2674     /* disable VS constant buffer */
2675     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2676     OUT_BATCH(batch, 0);
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0);
2680     OUT_BATCH(batch, 0);
2681     OUT_BATCH(batch, 0);
2682         
2683     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2684     OUT_BATCH(batch, 0); /* without VS kernel */
2685     OUT_BATCH(batch, 0);
2686     OUT_BATCH(batch, 0);
2687     OUT_BATCH(batch, 0);
2688     OUT_BATCH(batch, 0); /* pass-through */
2689 }
2690
2691 static void 
2692 gen7_emit_bypass_state(VADriverContextP ctx)
2693 {
2694     struct i965_driver_data *i965 = i965_driver_data(ctx);
2695     struct intel_batchbuffer *batch = i965->batch;
2696
2697     /* bypass GS */
2698     BEGIN_BATCH(batch, 7);
2699     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2700     OUT_BATCH(batch, 0);
2701     OUT_BATCH(batch, 0);
2702     OUT_BATCH(batch, 0);
2703     OUT_BATCH(batch, 0);
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0);
2706     ADVANCE_BATCH(batch);
2707
2708     BEGIN_BATCH(batch, 7);      
2709     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2710     OUT_BATCH(batch, 0); /* without GS kernel */
2711     OUT_BATCH(batch, 0);
2712     OUT_BATCH(batch, 0);
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     OUT_BATCH(batch, 0); /* pass-through */
2716     ADVANCE_BATCH(batch);
2717
2718     BEGIN_BATCH(batch, 2);
2719     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2720     OUT_BATCH(batch, 0);
2721     ADVANCE_BATCH(batch);
2722
2723     /* disable HS */
2724     BEGIN_BATCH(batch, 7);
2725     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2726     OUT_BATCH(batch, 0);
2727     OUT_BATCH(batch, 0);
2728     OUT_BATCH(batch, 0);
2729     OUT_BATCH(batch, 0);
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     ADVANCE_BATCH(batch);
2733
2734     BEGIN_BATCH(batch, 7);
2735     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2736     OUT_BATCH(batch, 0);
2737     OUT_BATCH(batch, 0);
2738     OUT_BATCH(batch, 0);
2739     OUT_BATCH(batch, 0);
2740     OUT_BATCH(batch, 0);
2741     OUT_BATCH(batch, 0);
2742     ADVANCE_BATCH(batch);
2743
2744     BEGIN_BATCH(batch, 2);
2745     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2746     OUT_BATCH(batch, 0);
2747     ADVANCE_BATCH(batch);
2748
2749     /* Disable TE */
2750     BEGIN_BATCH(batch, 4);
2751     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2752     OUT_BATCH(batch, 0);
2753     OUT_BATCH(batch, 0);
2754     OUT_BATCH(batch, 0);
2755     ADVANCE_BATCH(batch);
2756
2757     /* Disable DS */
2758     BEGIN_BATCH(batch, 7);
2759     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2760     OUT_BATCH(batch, 0);
2761     OUT_BATCH(batch, 0);
2762     OUT_BATCH(batch, 0);
2763     OUT_BATCH(batch, 0);
2764     OUT_BATCH(batch, 0);
2765     OUT_BATCH(batch, 0);
2766     ADVANCE_BATCH(batch);
2767
2768     BEGIN_BATCH(batch, 6);
2769     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2770     OUT_BATCH(batch, 0);
2771     OUT_BATCH(batch, 0);
2772     OUT_BATCH(batch, 0);
2773     OUT_BATCH(batch, 0);
2774     OUT_BATCH(batch, 0);
2775     ADVANCE_BATCH(batch);
2776
2777     BEGIN_BATCH(batch, 2);
2778     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2779     OUT_BATCH(batch, 0);
2780     ADVANCE_BATCH(batch);
2781
2782     /* Disable STREAMOUT */
2783     BEGIN_BATCH(batch, 3);
2784     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2785     OUT_BATCH(batch, 0);
2786     OUT_BATCH(batch, 0);
2787     ADVANCE_BATCH(batch);
2788 }
2789
2790 static void 
2791 gen7_emit_clip_state(VADriverContextP ctx)
2792 {
2793     struct i965_driver_data *i965 = i965_driver_data(ctx);
2794     struct intel_batchbuffer *batch = i965->batch;
2795
2796     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2797     OUT_BATCH(batch, 0);
2798     OUT_BATCH(batch, 0); /* pass-through */
2799     OUT_BATCH(batch, 0);
2800 }
2801
2802 static void 
2803 gen7_emit_sf_state(VADriverContextP ctx)
2804 {
2805     struct i965_driver_data *i965 = i965_driver_data(ctx);
2806     struct intel_batchbuffer *batch = i965->batch;
2807
2808     BEGIN_BATCH(batch, 14);
2809     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2810     OUT_BATCH(batch,
2811               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2812               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2813               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2814     OUT_BATCH(batch, 0);
2815     OUT_BATCH(batch, 0);
2816     OUT_BATCH(batch, 0); /* DW4 */
2817     OUT_BATCH(batch, 0);
2818     OUT_BATCH(batch, 0);
2819     OUT_BATCH(batch, 0);
2820     OUT_BATCH(batch, 0);
2821     OUT_BATCH(batch, 0); /* DW9 */
2822     OUT_BATCH(batch, 0);
2823     OUT_BATCH(batch, 0);
2824     OUT_BATCH(batch, 0);
2825     OUT_BATCH(batch, 0);
2826     ADVANCE_BATCH(batch);
2827
2828     BEGIN_BATCH(batch, 7);
2829     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2830     OUT_BATCH(batch, 0);
2831     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2832     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2833     OUT_BATCH(batch, 0);
2834     OUT_BATCH(batch, 0);
2835     OUT_BATCH(batch, 0);
2836     ADVANCE_BATCH(batch);
2837 }
2838
2839 static void 
2840 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2841 {
2842     struct i965_driver_data *i965 = i965_driver_data(ctx);
2843     struct intel_batchbuffer *batch = i965->batch;
2844     struct i965_render_state *render_state = &i965->render_state;
2845     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2846     unsigned int num_samples = 0;
2847
2848     if (IS_HASWELL(i965->intel.device_info)) {
2849         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2850         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2851     }
2852
2853     BEGIN_BATCH(batch, 3);
2854     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2855     OUT_BATCH(batch,
2856               GEN7_WM_DISPATCH_ENABLE |
2857               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2858     OUT_BATCH(batch, 0);
2859     ADVANCE_BATCH(batch);
2860
2861     BEGIN_BATCH(batch, 7);
2862     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2863     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2864     OUT_BATCH(batch, 0);
2865     OUT_RELOC(batch, 
2866               render_state->curbe.bo,
2867               I915_GEM_DOMAIN_INSTRUCTION, 0,
2868               0);
2869     OUT_BATCH(batch, 0);
2870     OUT_BATCH(batch, 0);
2871     OUT_BATCH(batch, 0);
2872     ADVANCE_BATCH(batch);
2873
2874     BEGIN_BATCH(batch, 8);
2875     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2876     OUT_RELOC(batch, 
2877               render_state->render_kernels[kernel].bo,
2878               I915_GEM_DOMAIN_INSTRUCTION, 0,
2879               0);
2880     OUT_BATCH(batch, 
2881               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2882               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2883     OUT_BATCH(batch, 0); /* scratch space base offset */
2884     OUT_BATCH(batch, 
2885               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2886               GEN7_PS_PUSH_CONSTANT_ENABLE |
2887               GEN7_PS_ATTRIBUTE_ENABLE |
2888               GEN7_PS_16_DISPATCH_ENABLE);
2889     OUT_BATCH(batch, 
2890               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2891     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2892     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2893     ADVANCE_BATCH(batch);
2894 }
2895
2896 static void
2897 gen7_emit_vertex_element_state(VADriverContextP ctx)
2898 {
2899     struct i965_driver_data *i965 = i965_driver_data(ctx);
2900     struct intel_batchbuffer *batch = i965->batch;
2901
2902     /* Set up our vertex elements, sourced from the single vertex buffer. */
2903     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2904     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2905     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2906               GEN6_VE0_VALID |
2907               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2908               (0 << VE0_OFFSET_SHIFT));
2909     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2910               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2911               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2912               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2913     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2914     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2915               GEN6_VE0_VALID |
2916               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2917               (8 << VE0_OFFSET_SHIFT));
2918     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2919               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2920               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2921               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2922 }
2923
2924 static void
2925 gen7_emit_vertices(VADriverContextP ctx)
2926 {
2927     struct i965_driver_data *i965 = i965_driver_data(ctx);
2928     struct intel_batchbuffer *batch = i965->batch;
2929     struct i965_render_state *render_state = &i965->render_state;
2930
2931     BEGIN_BATCH(batch, 5);
2932     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2933     OUT_BATCH(batch, 
2934               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2935               GEN6_VB0_VERTEXDATA |
2936               GEN7_VB0_ADDRESS_MODIFYENABLE |
2937               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2938     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2939     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2940     OUT_BATCH(batch, 0);
2941     ADVANCE_BATCH(batch);
2942
2943     BEGIN_BATCH(batch, 7);
2944     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2945     OUT_BATCH(batch,
2946               _3DPRIM_RECTLIST |
2947               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2948     OUT_BATCH(batch, 3); /* vertex count per instance */
2949     OUT_BATCH(batch, 0); /* start vertex offset */
2950     OUT_BATCH(batch, 1); /* single instance */
2951     OUT_BATCH(batch, 0); /* start instance location */
2952     OUT_BATCH(batch, 0);
2953     ADVANCE_BATCH(batch);
2954 }
2955
2956 static void
2957 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2958 {
2959     struct i965_driver_data *i965 = i965_driver_data(ctx);
2960     struct intel_batchbuffer *batch = i965->batch;
2961
2962     intel_batchbuffer_start_atomic(batch, 0x1000);
2963     intel_batchbuffer_emit_mi_flush(batch);
2964     gen7_emit_invarient_states(ctx);
2965     gen7_emit_state_base_address(ctx);
2966     gen7_emit_viewport_state_pointers(ctx);
2967     gen7_emit_urb(ctx);
2968     gen7_emit_cc_state_pointers(ctx);
2969     gen7_emit_sampler_state_pointers(ctx);
2970     gen7_emit_bypass_state(ctx);
2971     gen7_emit_vs_state(ctx);
2972     gen7_emit_clip_state(ctx);
2973     gen7_emit_sf_state(ctx);
2974     gen7_emit_wm_state(ctx, kernel);
2975     gen7_emit_binding_table(ctx);
2976     gen7_emit_depth_buffer_state(ctx);
2977     gen7_emit_drawing_rectangle(ctx);
2978     gen7_emit_vertex_element_state(ctx);
2979     gen7_emit_vertices(ctx);
2980     intel_batchbuffer_end_atomic(batch);
2981 }
2982
2983
2984 static void
2985 gen7_render_put_surface(
2986     VADriverContextP   ctx,
2987     struct object_surface *obj_surface,    
2988     const VARectangle *src_rect,
2989     const VARectangle *dst_rect,
2990     unsigned int       flags
2991 )
2992 {
2993     struct i965_driver_data *i965 = i965_driver_data(ctx);
2994     struct intel_batchbuffer *batch = i965->batch;
2995
2996     gen7_render_initialize(ctx);
2997     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2998     i965_clear_dest_region(ctx);
2999     gen7_render_emit_states(ctx, PS_KERNEL);
3000     intel_batchbuffer_flush(batch);
3001 }
3002
3003
3004 static void
3005 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3006 {
3007     struct i965_driver_data *i965 = i965_driver_data(ctx);
3008     struct i965_render_state *render_state = &i965->render_state;
3009     struct gen6_blend_state *blend_state;
3010
3011     dri_bo_unmap(render_state->cc.state);    
3012     dri_bo_map(render_state->cc.blend, 1);
3013     assert(render_state->cc.blend->virtual);
3014     blend_state = render_state->cc.blend->virtual;
3015     memset(blend_state, 0, sizeof(*blend_state));
3016     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3017     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3018     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3019     blend_state->blend0.blend_enable = 1;
3020     blend_state->blend1.post_blend_clamp_enable = 1;
3021     blend_state->blend1.pre_blend_clamp_enable = 1;
3022     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3023     dri_bo_unmap(render_state->cc.blend);
3024 }
3025
3026 static void
3027 gen7_subpicture_render_setup_states(
3028     VADriverContextP   ctx,
3029     struct object_surface *obj_surface,
3030     const VARectangle *src_rect,
3031     const VARectangle *dst_rect
3032 )
3033 {
3034     i965_render_dest_surface_state(ctx, 0);
3035     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3036     i965_render_sampler(ctx);
3037     i965_render_cc_viewport(ctx);
3038     gen7_render_color_calc_state(ctx);
3039     gen7_subpicture_render_blend_state(ctx);
3040     gen7_render_depth_stencil_state(ctx);
3041     i965_subpic_render_upload_constants(ctx, obj_surface);
3042     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3043 }
3044
3045 static void
3046 gen7_render_put_subpicture(
3047     VADriverContextP   ctx,
3048     struct object_surface *obj_surface,
3049     const VARectangle *src_rect,
3050     const VARectangle *dst_rect
3051 )
3052 {
3053     struct i965_driver_data *i965 = i965_driver_data(ctx);
3054     struct intel_batchbuffer *batch = i965->batch;
3055     unsigned int index = obj_surface->subpic_render_idx;
3056     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3057
3058     assert(obj_subpic);
3059     gen7_render_initialize(ctx);
3060     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3061     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3062     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3063     intel_batchbuffer_flush(batch);
3064 }
3065
3066
3067 void
3068 intel_render_put_surface(
3069     VADriverContextP   ctx,
3070     struct object_surface *obj_surface,
3071     const VARectangle *src_rect,
3072     const VARectangle *dst_rect,
3073     unsigned int       flags
3074 )
3075 {
3076     struct i965_driver_data *i965 = i965_driver_data(ctx);
3077     struct i965_render_state *render_state = &i965->render_state;
3078     int has_done_scaling = 0;
3079     VARectangle calibrated_rect;
3080     VASurfaceID out_surface_id = i965_post_processing(ctx,
3081                                                       obj_surface,
3082                                                       src_rect,
3083                                                       dst_rect,
3084                                                       flags,
3085                                                       &has_done_scaling,
3086                                                       &calibrated_rect);
3087
3088     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3089
3090     if (out_surface_id != VA_INVALID_ID) {
3091         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3092         
3093         if (new_obj_surface && new_obj_surface->bo)
3094             obj_surface = new_obj_surface;
3095
3096         if (has_done_scaling)
3097             src_rect = &calibrated_rect;
3098     }
3099
3100     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3101
3102     if (out_surface_id != VA_INVALID_ID)
3103         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3104 }
3105
3106 void
3107 intel_render_put_subpicture(
3108     VADriverContextP   ctx,
3109     struct object_surface *obj_surface,
3110     const VARectangle *src_rect,
3111     const VARectangle *dst_rect
3112 )
3113 {
3114     struct i965_driver_data *i965 = i965_driver_data(ctx);
3115     struct i965_render_state *render_state = &i965->render_state;
3116
3117     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3118 }
3119
3120 static void
3121 genx_render_terminate(VADriverContextP ctx)
3122 {
3123     int i;
3124     struct i965_driver_data *i965 = i965_driver_data(ctx);
3125     struct i965_render_state *render_state = &i965->render_state;
3126
3127     dri_bo_unreference(render_state->curbe.bo);
3128     render_state->curbe.bo = NULL;
3129
3130     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3131         struct i965_kernel *kernel = &render_state->render_kernels[i];
3132
3133         dri_bo_unreference(kernel->bo);
3134         kernel->bo = NULL;
3135     }
3136
3137     dri_bo_unreference(render_state->vb.vertex_buffer);
3138     render_state->vb.vertex_buffer = NULL;
3139     dri_bo_unreference(render_state->vs.state);
3140     render_state->vs.state = NULL;
3141     dri_bo_unreference(render_state->sf.state);
3142     render_state->sf.state = NULL;
3143     dri_bo_unreference(render_state->wm.sampler);
3144     render_state->wm.sampler = NULL;
3145     dri_bo_unreference(render_state->wm.state);
3146     render_state->wm.state = NULL;
3147     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3148     dri_bo_unreference(render_state->cc.viewport);
3149     render_state->cc.viewport = NULL;
3150     dri_bo_unreference(render_state->cc.state);
3151     render_state->cc.state = NULL;
3152     dri_bo_unreference(render_state->cc.blend);
3153     render_state->cc.blend = NULL;
3154     dri_bo_unreference(render_state->cc.depth_stencil);
3155     render_state->cc.depth_stencil = NULL;
3156
3157     if (render_state->draw_region) {
3158         dri_bo_unreference(render_state->draw_region->bo);
3159         free(render_state->draw_region);
3160         render_state->draw_region = NULL;
3161     }
3162 }
3163
3164 bool 
3165 genx_render_init(VADriverContextP ctx)
3166 {
3167     struct i965_driver_data *i965 = i965_driver_data(ctx);
3168     struct i965_render_state *render_state = &i965->render_state;
3169     int i;
3170
3171     /* kernel */
3172     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3173                                  sizeof(render_kernels_gen5[0])));
3174     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3175                                  sizeof(render_kernels_gen6[0])));
3176
3177     if (IS_GEN7(i965->intel.device_info)) {
3178         memcpy(render_state->render_kernels,
3179                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3180                sizeof(render_state->render_kernels));
3181         render_state->render_put_surface = gen7_render_put_surface;
3182         render_state->render_put_subpicture = gen7_render_put_subpicture;
3183     } else if (IS_GEN6(i965->intel.device_info)) {
3184         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3185         render_state->render_put_surface = gen6_render_put_surface;
3186         render_state->render_put_subpicture = gen6_render_put_subpicture;
3187     } else if (IS_IRONLAKE(i965->intel.device_info)) {
3188         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3189         render_state->render_put_surface = i965_render_put_surface;
3190         render_state->render_put_subpicture = i965_render_put_subpicture;
3191     } else {
3192         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3193         render_state->render_put_surface = i965_render_put_surface;
3194         render_state->render_put_subpicture = i965_render_put_subpicture;
3195     }
3196
3197     render_state->render_terminate = genx_render_terminate;
3198
3199     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3200         struct i965_kernel *kernel = &render_state->render_kernels[i];
3201
3202         if (!kernel->size)
3203             continue;
3204
3205         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3206                                   kernel->name, 
3207                                   kernel->size, 0x1000);
3208         assert(kernel->bo);
3209         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3210     }
3211
3212     /* constant buffer */
3213     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3214                       "constant buffer",
3215                       4096, 64);
3216     assert(render_state->curbe.bo);
3217
3218     return true;
3219 }
3220
3221 bool
3222 i965_render_init(VADriverContextP ctx)
3223 {
3224     struct i965_driver_data *i965 = i965_driver_data(ctx);
3225
3226     return i965->codec_info->render_init(ctx);
3227 }
3228
3229 void
3230 i965_render_terminate(VADriverContextP ctx)
3231 {
3232     struct i965_driver_data *i965 = i965_driver_data(ctx);
3233     struct i965_render_state *render_state = &i965->render_state;
3234
3235     render_state->render_terminate(ctx);
3236 }