OSDN Git Service

Increase the size of constant buffer for PS thread to pass more info
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
151 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
152 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
153 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
154 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
155
156 static uint32_t float_to_uint (float f) 
157 {
158     union {
159         uint32_t i; 
160         float f;
161     } x;
162
163     x.f = f;
164     return x.i;
165 }
166
167 enum 
168 {
169     SF_KERNEL = 0,
170     PS_KERNEL,
171     PS_SUBPIC_KERNEL
172 };
173
174 static struct i965_kernel render_kernels_gen4[] = {
175     {
176         "SF",
177         SF_KERNEL,
178         sf_kernel_static,
179         sizeof(sf_kernel_static),
180         NULL
181     },
182     {
183         "PS",
184         PS_KERNEL,
185         ps_kernel_static,
186         sizeof(ps_kernel_static),
187         NULL
188     },
189
190     {
191         "PS_SUBPIC",
192         PS_SUBPIC_KERNEL,
193         ps_subpic_kernel_static,
194         sizeof(ps_subpic_kernel_static),
195         NULL
196     }
197 };
198
199 static struct i965_kernel render_kernels_gen5[] = {
200     {
201         "SF",
202         SF_KERNEL,
203         sf_kernel_static_gen5,
204         sizeof(sf_kernel_static_gen5),
205         NULL
206     },
207     {
208         "PS",
209         PS_KERNEL,
210         ps_kernel_static_gen5,
211         sizeof(ps_kernel_static_gen5),
212         NULL
213     },
214
215     {
216         "PS_SUBPIC",
217         PS_SUBPIC_KERNEL,
218         ps_subpic_kernel_static_gen5,
219         sizeof(ps_subpic_kernel_static_gen5),
220         NULL
221     }
222 };
223
224 static struct i965_kernel render_kernels_gen6[] = {
225     {
226         "SF",
227         SF_KERNEL,
228         sf_kernel_static_gen6,
229         sizeof(sf_kernel_static_gen6),
230         NULL
231     },
232     {
233         "PS",
234         PS_KERNEL,
235         ps_kernel_static_gen6,
236         sizeof(ps_kernel_static_gen6),
237         NULL
238     },
239
240     {
241         "PS_SUBPIC",
242         PS_SUBPIC_KERNEL,
243         ps_subpic_kernel_static_gen6,
244         sizeof(ps_subpic_kernel_static_gen6),
245         NULL
246     }
247 };
248
249 static struct i965_kernel render_kernels_gen7[] = {
250     {
251         "SF",
252         SF_KERNEL,
253         sf_kernel_static_gen7,
254         sizeof(sf_kernel_static_gen7),
255         NULL
256     },
257     {
258         "PS",
259         PS_KERNEL,
260         ps_kernel_static_gen7,
261         sizeof(ps_kernel_static_gen7),
262         NULL
263     },
264
265     {
266         "PS_SUBPIC",
267         PS_SUBPIC_KERNEL,
268         ps_subpic_kernel_static_gen7,
269         sizeof(ps_subpic_kernel_static_gen7),
270         NULL
271     }
272 };
273
274 static struct i965_kernel render_kernels_gen7_haswell[] = {
275     {
276         "SF",
277         SF_KERNEL,
278         sf_kernel_static_gen7,
279         sizeof(sf_kernel_static_gen7),
280         NULL
281     },
282     {
283         "PS",
284         PS_KERNEL,
285         ps_kernel_static_gen7_haswell,
286         sizeof(ps_kernel_static_gen7_haswell),
287         NULL
288     },
289
290     {
291         "PS_SUBPIC",
292         PS_SUBPIC_KERNEL,
293         ps_subpic_kernel_static_gen7,
294         sizeof(ps_subpic_kernel_static_gen7),
295         NULL
296     }
297 };
298
299 #define URB_VS_ENTRIES        8
300 #define URB_VS_ENTRY_SIZE     1
301
302 #define URB_GS_ENTRIES        0
303 #define URB_GS_ENTRY_SIZE     0
304
305 #define URB_CLIP_ENTRIES      0
306 #define URB_CLIP_ENTRY_SIZE   0
307
308 #define URB_SF_ENTRIES        1
309 #define URB_SF_ENTRY_SIZE     2
310
311 #define URB_CS_ENTRIES        4
312 #define URB_CS_ENTRY_SIZE     4
313
314 static void
315 i965_render_vs_unit(VADriverContextP ctx)
316 {
317     struct i965_driver_data *i965 = i965_driver_data(ctx);
318     struct i965_render_state *render_state = &i965->render_state;
319     struct i965_vs_unit_state *vs_state;
320
321     dri_bo_map(render_state->vs.state, 1);
322     assert(render_state->vs.state->virtual);
323     vs_state = render_state->vs.state->virtual;
324     memset(vs_state, 0, sizeof(*vs_state));
325
326     if (IS_IRONLAKE(i965->intel.device_id))
327         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
328     else
329         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
330
331     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
332     vs_state->vs6.vs_enable = 0;
333     vs_state->vs6.vert_cache_disable = 1;
334     
335     dri_bo_unmap(render_state->vs.state);
336 }
337
338 static void
339 i965_render_sf_unit(VADriverContextP ctx)
340 {
341     struct i965_driver_data *i965 = i965_driver_data(ctx);
342     struct i965_render_state *render_state = &i965->render_state;
343     struct i965_sf_unit_state *sf_state;
344
345     dri_bo_map(render_state->sf.state, 1);
346     assert(render_state->sf.state->virtual);
347     sf_state = render_state->sf.state->virtual;
348     memset(sf_state, 0, sizeof(*sf_state));
349
350     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
351     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
352
353     sf_state->sf1.single_program_flow = 1; /* XXX */
354     sf_state->sf1.binding_table_entry_count = 0;
355     sf_state->sf1.thread_priority = 0;
356     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
357     sf_state->sf1.illegal_op_exception_enable = 1;
358     sf_state->sf1.mask_stack_exception_enable = 1;
359     sf_state->sf1.sw_exception_enable = 1;
360
361     /* scratch space is not used in our kernel */
362     sf_state->thread2.per_thread_scratch_space = 0;
363     sf_state->thread2.scratch_space_base_pointer = 0;
364
365     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
366     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
367     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
368     sf_state->thread3.urb_entry_read_offset = 0;
369     sf_state->thread3.dispatch_grf_start_reg = 3;
370
371     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
372     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
373     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
374     sf_state->thread4.stats_enable = 1;
375
376     sf_state->sf5.viewport_transform = 0; /* skip viewport */
377
378     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
379     sf_state->sf6.scissor = 0;
380
381     sf_state->sf7.trifan_pv = 2;
382
383     sf_state->sf6.dest_org_vbias = 0x8;
384     sf_state->sf6.dest_org_hbias = 0x8;
385
386     dri_bo_emit_reloc(render_state->sf.state,
387                       I915_GEM_DOMAIN_INSTRUCTION, 0,
388                       sf_state->thread0.grf_reg_count << 1,
389                       offsetof(struct i965_sf_unit_state, thread0),
390                       render_state->render_kernels[SF_KERNEL].bo);
391
392     dri_bo_unmap(render_state->sf.state);
393 }
394
395 static void 
396 i965_render_sampler(VADriverContextP ctx)
397 {
398     struct i965_driver_data *i965 = i965_driver_data(ctx);
399     struct i965_render_state *render_state = &i965->render_state;
400     struct i965_sampler_state *sampler_state;
401     int i;
402     
403     assert(render_state->wm.sampler_count > 0);
404     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
405
406     dri_bo_map(render_state->wm.sampler, 1);
407     assert(render_state->wm.sampler->virtual);
408     sampler_state = render_state->wm.sampler->virtual;
409     for (i = 0; i < render_state->wm.sampler_count; i++) {
410         memset(sampler_state, 0, sizeof(*sampler_state));
411         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
412         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
413         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
414         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
415         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
416         sampler_state++;
417     }
418
419     dri_bo_unmap(render_state->wm.sampler);
420 }
421 static void
422 i965_subpic_render_wm_unit(VADriverContextP ctx)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct i965_render_state *render_state = &i965->render_state;
426     struct i965_wm_unit_state *wm_state;
427
428     assert(render_state->wm.sampler);
429
430     dri_bo_map(render_state->wm.state, 1);
431     assert(render_state->wm.state->virtual);
432     wm_state = render_state->wm.state->virtual;
433     memset(wm_state, 0, sizeof(*wm_state));
434
435     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
436     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
437
438     wm_state->thread1.single_program_flow = 1; /* XXX */
439
440     if (IS_IRONLAKE(i965->intel.device_id))
441         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
442     else
443         wm_state->thread1.binding_table_entry_count = 7;
444
445     wm_state->thread2.scratch_space_base_pointer = 0;
446     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
447
448     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
449     wm_state->thread3.const_urb_entry_read_length = 4;
450     wm_state->thread3.const_urb_entry_read_offset = 0;
451     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
452     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
453
454     wm_state->wm4.stats_enable = 0;
455     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
456
457     if (IS_IRONLAKE(i965->intel.device_id)) {
458         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
459     } else {
460         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
461     }
462
463     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
464     wm_state->wm5.thread_dispatch_enable = 1;
465     wm_state->wm5.enable_16_pix = 1;
466     wm_state->wm5.enable_8_pix = 0;
467     wm_state->wm5.early_depth_test = 1;
468
469     dri_bo_emit_reloc(render_state->wm.state,
470                       I915_GEM_DOMAIN_INSTRUCTION, 0,
471                       wm_state->thread0.grf_reg_count << 1,
472                       offsetof(struct i965_wm_unit_state, thread0),
473                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
474
475     dri_bo_emit_reloc(render_state->wm.state,
476                       I915_GEM_DOMAIN_INSTRUCTION, 0,
477                       wm_state->wm4.sampler_count << 2,
478                       offsetof(struct i965_wm_unit_state, wm4),
479                       render_state->wm.sampler);
480
481     dri_bo_unmap(render_state->wm.state);
482 }
483
484
485 static void
486 i965_render_wm_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_wm_unit_state *wm_state;
491
492     assert(render_state->wm.sampler);
493
494     dri_bo_map(render_state->wm.state, 1);
495     assert(render_state->wm.state->virtual);
496     wm_state = render_state->wm.state->virtual;
497     memset(wm_state, 0, sizeof(*wm_state));
498
499     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
500     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
501
502     wm_state->thread1.single_program_flow = 1; /* XXX */
503
504     if (IS_IRONLAKE(i965->intel.device_id))
505         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
506     else
507         wm_state->thread1.binding_table_entry_count = 7;
508
509     wm_state->thread2.scratch_space_base_pointer = 0;
510     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
511
512     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
513     wm_state->thread3.const_urb_entry_read_length = 4;
514     wm_state->thread3.const_urb_entry_read_offset = 0;
515     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
516     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
517
518     wm_state->wm4.stats_enable = 0;
519     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
520
521     if (IS_IRONLAKE(i965->intel.device_id)) {
522         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
523     } else {
524         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
525     }
526
527     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
528     wm_state->wm5.thread_dispatch_enable = 1;
529     wm_state->wm5.enable_16_pix = 1;
530     wm_state->wm5.enable_8_pix = 0;
531     wm_state->wm5.early_depth_test = 1;
532
533     dri_bo_emit_reloc(render_state->wm.state,
534                       I915_GEM_DOMAIN_INSTRUCTION, 0,
535                       wm_state->thread0.grf_reg_count << 1,
536                       offsetof(struct i965_wm_unit_state, thread0),
537                       render_state->render_kernels[PS_KERNEL].bo);
538
539     dri_bo_emit_reloc(render_state->wm.state,
540                       I915_GEM_DOMAIN_INSTRUCTION, 0,
541                       wm_state->wm4.sampler_count << 2,
542                       offsetof(struct i965_wm_unit_state, wm4),
543                       render_state->wm.sampler);
544
545     dri_bo_unmap(render_state->wm.state);
546 }
547
548 static void 
549 i965_render_cc_viewport(VADriverContextP ctx)
550 {
551     struct i965_driver_data *i965 = i965_driver_data(ctx);
552     struct i965_render_state *render_state = &i965->render_state;
553     struct i965_cc_viewport *cc_viewport;
554
555     dri_bo_map(render_state->cc.viewport, 1);
556     assert(render_state->cc.viewport->virtual);
557     cc_viewport = render_state->cc.viewport->virtual;
558     memset(cc_viewport, 0, sizeof(*cc_viewport));
559     
560     cc_viewport->min_depth = -1.e35;
561     cc_viewport->max_depth = 1.e35;
562
563     dri_bo_unmap(render_state->cc.viewport);
564 }
565
566 static void 
567 i965_subpic_render_cc_unit(VADriverContextP ctx)
568 {
569     struct i965_driver_data *i965 = i965_driver_data(ctx);
570     struct i965_render_state *render_state = &i965->render_state;
571     struct i965_cc_unit_state *cc_state;
572
573     assert(render_state->cc.viewport);
574
575     dri_bo_map(render_state->cc.state, 1);
576     assert(render_state->cc.state->virtual);
577     cc_state = render_state->cc.state->virtual;
578     memset(cc_state, 0, sizeof(*cc_state));
579
580     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
581     cc_state->cc2.depth_test = 0;       /* disable depth test */
582     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
583     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
584     cc_state->cc3.blend_enable = 1;     /* enable color blend */
585     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
586     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
587     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
588     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
589
590     cc_state->cc5.dither_enable = 0;    /* disable dither */
591     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
592     cc_state->cc5.statistics_enable = 1;
593     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
594     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
595     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
596
597     cc_state->cc6.clamp_post_alpha_blend = 0; 
598     cc_state->cc6.clamp_pre_alpha_blend  =0; 
599     
600     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
601     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
602     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
603     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
604    
605     /*alpha test reference*/
606     cc_state->cc7.alpha_ref.f =0.0 ;
607
608
609     dri_bo_emit_reloc(render_state->cc.state,
610                       I915_GEM_DOMAIN_INSTRUCTION, 0,
611                       0,
612                       offsetof(struct i965_cc_unit_state, cc4),
613                       render_state->cc.viewport);
614
615     dri_bo_unmap(render_state->cc.state);
616 }
617
618
619 static void 
620 i965_render_cc_unit(VADriverContextP ctx)
621 {
622     struct i965_driver_data *i965 = i965_driver_data(ctx);
623     struct i965_render_state *render_state = &i965->render_state;
624     struct i965_cc_unit_state *cc_state;
625
626     assert(render_state->cc.viewport);
627
628     dri_bo_map(render_state->cc.state, 1);
629     assert(render_state->cc.state->virtual);
630     cc_state = render_state->cc.state->virtual;
631     memset(cc_state, 0, sizeof(*cc_state));
632
633     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
634     cc_state->cc2.depth_test = 0;       /* disable depth test */
635     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
636     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
637     cc_state->cc3.blend_enable = 0;     /* disable color blend */
638     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
639     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
640
641     cc_state->cc5.dither_enable = 0;    /* disable dither */
642     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
643     cc_state->cc5.statistics_enable = 1;
644     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
645     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
646     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
647
648     dri_bo_emit_reloc(render_state->cc.state,
649                       I915_GEM_DOMAIN_INSTRUCTION, 0,
650                       0,
651                       offsetof(struct i965_cc_unit_state, cc4),
652                       render_state->cc.viewport);
653
654     dri_bo_unmap(render_state->cc.state);
655 }
656
657 static void
658 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
659 {
660     switch (tiling) {
661     case I915_TILING_NONE:
662         ss->ss3.tiled_surface = 0;
663         ss->ss3.tile_walk = 0;
664         break;
665     case I915_TILING_X:
666         ss->ss3.tiled_surface = 1;
667         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
668         break;
669     case I915_TILING_Y:
670         ss->ss3.tiled_surface = 1;
671         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
672         break;
673     }
674 }
675
676 static void
677 i965_render_set_surface_state(
678     struct i965_surface_state *ss,
679     dri_bo                    *bo,
680     unsigned long              offset,
681     unsigned int               width,
682     unsigned int               height,
683     unsigned int               pitch,
684     unsigned int               format,
685     unsigned int               flags
686 )
687 {
688     unsigned int tiling;
689     unsigned int swizzle;
690
691     memset(ss, 0, sizeof(*ss));
692
693     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
694     case I965_PP_FLAG_BOTTOM_FIELD:
695         ss->ss0.vert_line_stride_ofs = 1;
696         /* fall-through */
697     case I965_PP_FLAG_TOP_FIELD:
698         ss->ss0.vert_line_stride = 1;
699         height /= 2;
700         break;
701     }
702
703     ss->ss0.surface_type = I965_SURFACE_2D;
704     ss->ss0.surface_format = format;
705     ss->ss0.color_blend = 1;
706
707     ss->ss1.base_addr = bo->offset + offset;
708
709     ss->ss2.width = width - 1;
710     ss->ss2.height = height - 1;
711
712     ss->ss3.pitch = pitch - 1;
713
714     dri_bo_get_tiling(bo, &tiling, &swizzle);
715     i965_render_set_surface_tiling(ss, tiling);
716 }
717
718 static void
719 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
720 {
721    switch (tiling) {
722    case I915_TILING_NONE:
723       ss->ss0.tiled_surface = 0;
724       ss->ss0.tile_walk = 0;
725       break;
726    case I915_TILING_X:
727       ss->ss0.tiled_surface = 1;
728       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
729       break;
730    case I915_TILING_Y:
731       ss->ss0.tiled_surface = 1;
732       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
733       break;
734    }
735 }
736
737 /* Set "Shader Channel Select" */
738 void
739 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
740 {
741     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
742     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
743     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
744     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
745 }
746
747 static void
748 gen7_render_set_surface_state(
749     struct gen7_surface_state *ss,
750     dri_bo                    *bo,
751     unsigned long              offset,
752     int                        width,
753     int                        height,
754     int                        pitch,
755     int                        format,
756     unsigned int               flags
757 )
758 {
759     unsigned int tiling;
760     unsigned int swizzle;
761
762     memset(ss, 0, sizeof(*ss));
763
764     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
765     case I965_PP_FLAG_BOTTOM_FIELD:
766         ss->ss0.vert_line_stride_ofs = 1;
767         /* fall-through */
768     case I965_PP_FLAG_TOP_FIELD:
769         ss->ss0.vert_line_stride = 1;
770         height /= 2;
771         break;
772     }
773
774     ss->ss0.surface_type = I965_SURFACE_2D;
775     ss->ss0.surface_format = format;
776
777     ss->ss1.base_addr = bo->offset + offset;
778
779     ss->ss2.width = width - 1;
780     ss->ss2.height = height - 1;
781
782     ss->ss3.pitch = pitch - 1;
783
784     dri_bo_get_tiling(bo, &tiling, &swizzle);
785     gen7_render_set_surface_tiling(ss, tiling);
786 }
787
788 static void
789 i965_render_src_surface_state(
790     VADriverContextP ctx, 
791     int              index,
792     dri_bo          *region,
793     unsigned long    offset,
794     int              w,
795     int              h,
796     int              pitch,
797     int              format,
798     unsigned int     flags
799 )
800 {
801     struct i965_driver_data *i965 = i965_driver_data(ctx);  
802     struct i965_render_state *render_state = &i965->render_state;
803     void *ss;
804     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
805
806     assert(index < MAX_RENDER_SURFACES);
807
808     dri_bo_map(ss_bo, 1);
809     assert(ss_bo->virtual);
810     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
811
812     if (IS_GEN7(i965->intel.device_id)) {
813         gen7_render_set_surface_state(ss,
814                                       region, offset,
815                                       w, h,
816                                       pitch, format, flags);
817         if (IS_HASWELL(i965->intel.device_id))
818             gen7_render_set_surface_scs(ss);
819         dri_bo_emit_reloc(ss_bo,
820                           I915_GEM_DOMAIN_SAMPLER, 0,
821                           offset,
822                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
823                           region);
824     } else {
825         i965_render_set_surface_state(ss,
826                                       region, offset,
827                                       w, h,
828                                       pitch, format, flags);
829         dri_bo_emit_reloc(ss_bo,
830                           I915_GEM_DOMAIN_SAMPLER, 0,
831                           offset,
832                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
833                           region);
834     }
835
836     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
837     dri_bo_unmap(ss_bo);
838     render_state->wm.sampler_count++;
839 }
840
841 static void
842 i965_render_src_surfaces_state(
843     VADriverContextP ctx,
844     struct object_surface *obj_surface,
845     unsigned int     flags
846 )
847 {
848     int region_pitch;
849     int rw, rh;
850     dri_bo *region;
851
852     region_pitch = obj_surface->width;
853     rw = obj_surface->orig_width;
854     rh = obj_surface->orig_height;
855     region = obj_surface->bo;
856
857     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
858     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
859
860     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
861         i965_render_src_surface_state(ctx, 3, region,
862                                       region_pitch * obj_surface->y_cb_offset,
863                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
864                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
865         i965_render_src_surface_state(ctx, 4, region,
866                                       region_pitch * obj_surface->y_cb_offset,
867                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
868                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
869     } else {
870         i965_render_src_surface_state(ctx, 3, region,
871                                       region_pitch * obj_surface->y_cb_offset,
872                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
873                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
874         i965_render_src_surface_state(ctx, 4, region,
875                                       region_pitch * obj_surface->y_cb_offset,
876                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
877                                       I965_SURFACEFORMAT_R8_UNORM, flags);
878         i965_render_src_surface_state(ctx, 5, region,
879                                       region_pitch * obj_surface->y_cr_offset,
880                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
881                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
882         i965_render_src_surface_state(ctx, 6, region,
883                                       region_pitch * obj_surface->y_cr_offset,
884                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
885                                       I965_SURFACEFORMAT_R8_UNORM, flags);
886     }
887 }
888
889 static void
890 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
891                                       struct object_surface *obj_surface)
892 {
893     dri_bo *subpic_region;
894     unsigned int index = obj_surface->subpic_render_idx;
895     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
896     struct object_image *obj_image = obj_subpic->obj_image;
897
898     assert(obj_surface);
899     assert(obj_surface->bo);
900     subpic_region = obj_image->bo;
901     /*subpicture surface*/
902     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
903     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
904 }
905
906 static void
907 i965_render_dest_surface_state(VADriverContextP ctx, int index)
908 {
909     struct i965_driver_data *i965 = i965_driver_data(ctx);  
910     struct i965_render_state *render_state = &i965->render_state;
911     struct intel_region *dest_region = render_state->draw_region;
912     void *ss;
913     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
914     int format;
915     assert(index < MAX_RENDER_SURFACES);
916
917     if (dest_region->cpp == 2) {
918         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
919     } else {
920         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
921     }
922
923     dri_bo_map(ss_bo, 1);
924     assert(ss_bo->virtual);
925     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
926
927     if (IS_GEN7(i965->intel.device_id)) {
928         gen7_render_set_surface_state(ss,
929                                       dest_region->bo, 0,
930                                       dest_region->width, dest_region->height,
931                                       dest_region->pitch, format, 0);
932         if (IS_HASWELL(i965->intel.device_id))
933             gen7_render_set_surface_scs(ss);
934         dri_bo_emit_reloc(ss_bo,
935                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
936                           0,
937                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
938                           dest_region->bo);
939     } else {
940         i965_render_set_surface_state(ss,
941                                       dest_region->bo, 0,
942                                       dest_region->width, dest_region->height,
943                                       dest_region->pitch, format, 0);
944         dri_bo_emit_reloc(ss_bo,
945                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
946                           0,
947                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
948                           dest_region->bo);
949     }
950
951     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
952     dri_bo_unmap(ss_bo);
953 }
954
955 static void
956 i965_fill_vertex_buffer(
957     VADriverContextP ctx,
958     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
959     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
960 )
961 {
962     struct i965_driver_data * const i965 = i965_driver_data(ctx);
963     float vb[12];
964
965     enum { X1, Y1, X2, Y2 };
966
967     static const unsigned int g_rotation_indices[][6] = {
968         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
969         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
970         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
971         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
972     };
973
974     const unsigned int * const rotation_indices =
975         g_rotation_indices[i965->rotation_attrib->value];
976
977     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
978     vb[1]  = tex_coords[rotation_indices[1]];
979     vb[2]  = vid_coords[X2];
980     vb[3]  = vid_coords[Y2];
981
982     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
983     vb[5]  = tex_coords[rotation_indices[3]];
984     vb[6]  = vid_coords[X1];
985     vb[7]  = vid_coords[Y2];
986
987     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
988     vb[9]  = tex_coords[rotation_indices[5]];
989     vb[10] = vid_coords[X1];
990     vb[11] = vid_coords[Y1];
991
992     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
993 }
994
995 static void 
996 i965_subpic_render_upload_vertex(VADriverContextP ctx,
997                                  struct object_surface *obj_surface,
998                                  const VARectangle *output_rect)
999 {    
1000     unsigned int index = obj_surface->subpic_render_idx;
1001     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1002     float tex_coords[4], vid_coords[4];
1003     VARectangle dst_rect;
1004
1005     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1006         dst_rect = obj_subpic->dst_rect;
1007     else {
1008         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1009         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1010         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1011         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1012         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1013         dst_rect.height = sy * obj_subpic->dst_rect.height;
1014     }
1015
1016     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1017     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1018     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1019     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1020
1021     vid_coords[0] = dst_rect.x;
1022     vid_coords[1] = dst_rect.y;
1023     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1024     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1025
1026     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1027 }
1028
1029 static void 
1030 i965_render_upload_vertex(
1031     VADriverContextP   ctx,
1032     struct object_surface *obj_surface,
1033     const VARectangle *src_rect,
1034     const VARectangle *dst_rect
1035 )
1036 {
1037     struct i965_driver_data *i965 = i965_driver_data(ctx);
1038     struct i965_render_state *render_state = &i965->render_state;
1039     struct intel_region *dest_region = render_state->draw_region;
1040     float tex_coords[4], vid_coords[4];
1041     int width, height;
1042
1043     width  = obj_surface->orig_width;
1044     height = obj_surface->orig_height;
1045
1046     tex_coords[0] = (float)src_rect->x / width;
1047     tex_coords[1] = (float)src_rect->y / height;
1048     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1049     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1050
1051     vid_coords[0] = dest_region->x + dst_rect->x;
1052     vid_coords[1] = dest_region->y + dst_rect->y;
1053     vid_coords[2] = vid_coords[0] + dst_rect->width;
1054     vid_coords[3] = vid_coords[1] + dst_rect->height;
1055
1056     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1057 }
1058
1059 #define PI  3.1415926
1060
1061 static void
1062 i965_render_upload_constants(VADriverContextP ctx,
1063                              struct object_surface *obj_surface)
1064 {
1065     struct i965_driver_data *i965 = i965_driver_data(ctx);
1066     struct i965_render_state *render_state = &i965->render_state;
1067     unsigned short *constant_buffer;
1068     float *color_balance_base;
1069     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1070     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1071     float hue = (float)i965->hue_attrib->value / 180 * PI;
1072     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1073
1074     dri_bo_map(render_state->curbe.bo, 1);
1075     assert(render_state->curbe.bo->virtual);
1076     constant_buffer = render_state->curbe.bo->virtual;
1077
1078     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1079         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1080
1081         constant_buffer[0] = 2;
1082     } else {
1083         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1084             constant_buffer[0] = 1;
1085         else
1086             constant_buffer[0] = 0;
1087     }
1088
1089     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1090         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1091         i965->hue_attrib->value == DEFAULT_HUE &&
1092         i965->saturation_attrib->value == DEFAULT_SATURATION)
1093         constant_buffer[1] = 1; /* skip color balance transformation */
1094     else
1095         constant_buffer[1] = 0;
1096
1097     color_balance_base = (float *)constant_buffer + 4;
1098     *color_balance_base++ = contrast;
1099     *color_balance_base++ = brightness;
1100     *color_balance_base++ = cos(hue) * contrast * saturation;
1101     *color_balance_base++ = sin(hue) * contrast * saturation;
1102
1103     dri_bo_unmap(render_state->curbe.bo);
1104 }
1105
1106 static void
1107 i965_subpic_render_upload_constants(VADriverContextP ctx,
1108                                     struct object_surface *obj_surface)
1109 {
1110     struct i965_driver_data *i965 = i965_driver_data(ctx);
1111     struct i965_render_state *render_state = &i965->render_state;
1112     float *constant_buffer;
1113     float global_alpha = 1.0;
1114     unsigned int index = obj_surface->subpic_render_idx;
1115     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1116     
1117     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1118         global_alpha = obj_subpic->global_alpha;
1119     }
1120
1121     dri_bo_map(render_state->curbe.bo, 1);
1122
1123     assert(render_state->curbe.bo->virtual);
1124     constant_buffer = render_state->curbe.bo->virtual;
1125     *constant_buffer = global_alpha;
1126
1127     dri_bo_unmap(render_state->curbe.bo);
1128 }
1129  
1130 static void
1131 i965_surface_render_state_setup(
1132     VADriverContextP   ctx,
1133     struct object_surface *obj_surface,
1134     const VARectangle *src_rect,
1135     const VARectangle *dst_rect,
1136     unsigned int       flags
1137 )
1138 {
1139     i965_render_vs_unit(ctx);
1140     i965_render_sf_unit(ctx);
1141     i965_render_dest_surface_state(ctx, 0);
1142     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1143     i965_render_sampler(ctx);
1144     i965_render_wm_unit(ctx);
1145     i965_render_cc_viewport(ctx);
1146     i965_render_cc_unit(ctx);
1147     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1148     i965_render_upload_constants(ctx, obj_surface);
1149 }
1150
1151 static void
1152 i965_subpic_render_state_setup(
1153     VADriverContextP   ctx,
1154     struct object_surface *obj_surface,
1155     const VARectangle *src_rect,
1156     const VARectangle *dst_rect
1157 )
1158 {
1159     i965_render_vs_unit(ctx);
1160     i965_render_sf_unit(ctx);
1161     i965_render_dest_surface_state(ctx, 0);
1162     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1163     i965_render_sampler(ctx);
1164     i965_subpic_render_wm_unit(ctx);
1165     i965_render_cc_viewport(ctx);
1166     i965_subpic_render_cc_unit(ctx);
1167     i965_subpic_render_upload_constants(ctx, obj_surface);
1168     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1169 }
1170
1171
1172 static void
1173 i965_render_pipeline_select(VADriverContextP ctx)
1174 {
1175     struct i965_driver_data *i965 = i965_driver_data(ctx);
1176     struct intel_batchbuffer *batch = i965->batch;
1177  
1178     BEGIN_BATCH(batch, 1);
1179     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1180     ADVANCE_BATCH(batch);
1181 }
1182
1183 static void
1184 i965_render_state_sip(VADriverContextP ctx)
1185 {
1186     struct i965_driver_data *i965 = i965_driver_data(ctx);
1187     struct intel_batchbuffer *batch = i965->batch;
1188
1189     BEGIN_BATCH(batch, 2);
1190     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1191     OUT_BATCH(batch, 0);
1192     ADVANCE_BATCH(batch);
1193 }
1194
1195 static void
1196 i965_render_state_base_address(VADriverContextP ctx)
1197 {
1198     struct i965_driver_data *i965 = i965_driver_data(ctx);
1199     struct intel_batchbuffer *batch = i965->batch;
1200     struct i965_render_state *render_state = &i965->render_state;
1201
1202     if (IS_IRONLAKE(i965->intel.device_id)) {
1203         BEGIN_BATCH(batch, 8);
1204         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1205         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1206         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1207         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1208         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1209         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1210         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1211         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1212         ADVANCE_BATCH(batch);
1213     } else {
1214         BEGIN_BATCH(batch, 6);
1215         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1216         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1217         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1218         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1219         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1220         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1221         ADVANCE_BATCH(batch);
1222     }
1223 }
1224
1225 static void
1226 i965_render_binding_table_pointers(VADriverContextP ctx)
1227 {
1228     struct i965_driver_data *i965 = i965_driver_data(ctx);
1229     struct intel_batchbuffer *batch = i965->batch;
1230
1231     BEGIN_BATCH(batch, 6);
1232     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1233     OUT_BATCH(batch, 0); /* vs */
1234     OUT_BATCH(batch, 0); /* gs */
1235     OUT_BATCH(batch, 0); /* clip */
1236     OUT_BATCH(batch, 0); /* sf */
1237     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1238     ADVANCE_BATCH(batch);
1239 }
1240
1241 static void 
1242 i965_render_constant_color(VADriverContextP ctx)
1243 {
1244     struct i965_driver_data *i965 = i965_driver_data(ctx);
1245     struct intel_batchbuffer *batch = i965->batch;
1246
1247     BEGIN_BATCH(batch, 5);
1248     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1249     OUT_BATCH(batch, float_to_uint(1.0));
1250     OUT_BATCH(batch, float_to_uint(0.0));
1251     OUT_BATCH(batch, float_to_uint(1.0));
1252     OUT_BATCH(batch, float_to_uint(1.0));
1253     ADVANCE_BATCH(batch);
1254 }
1255
1256 static void
1257 i965_render_pipelined_pointers(VADriverContextP ctx)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch = i965->batch;
1261     struct i965_render_state *render_state = &i965->render_state;
1262
1263     BEGIN_BATCH(batch, 7);
1264     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1265     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1266     OUT_BATCH(batch, 0);  /* disable GS */
1267     OUT_BATCH(batch, 0);  /* disable CLIP */
1268     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1269     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1270     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1271     ADVANCE_BATCH(batch);
1272 }
1273
1274 static void
1275 i965_render_urb_layout(VADriverContextP ctx)
1276 {
1277     struct i965_driver_data *i965 = i965_driver_data(ctx);
1278     struct intel_batchbuffer *batch = i965->batch;
1279     int urb_vs_start, urb_vs_size;
1280     int urb_gs_start, urb_gs_size;
1281     int urb_clip_start, urb_clip_size;
1282     int urb_sf_start, urb_sf_size;
1283     int urb_cs_start, urb_cs_size;
1284
1285     urb_vs_start = 0;
1286     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1287     urb_gs_start = urb_vs_start + urb_vs_size;
1288     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1289     urb_clip_start = urb_gs_start + urb_gs_size;
1290     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1291     urb_sf_start = urb_clip_start + urb_clip_size;
1292     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1293     urb_cs_start = urb_sf_start + urb_sf_size;
1294     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1295
1296     BEGIN_BATCH(batch, 3);
1297     OUT_BATCH(batch, 
1298               CMD_URB_FENCE |
1299               UF0_CS_REALLOC |
1300               UF0_SF_REALLOC |
1301               UF0_CLIP_REALLOC |
1302               UF0_GS_REALLOC |
1303               UF0_VS_REALLOC |
1304               1);
1305     OUT_BATCH(batch, 
1306               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1307               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1308               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1309     OUT_BATCH(batch,
1310               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1311               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1312     ADVANCE_BATCH(batch);
1313 }
1314
1315 static void 
1316 i965_render_cs_urb_layout(VADriverContextP ctx)
1317 {
1318     struct i965_driver_data *i965 = i965_driver_data(ctx);
1319     struct intel_batchbuffer *batch = i965->batch;
1320
1321     BEGIN_BATCH(batch, 2);
1322     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1323     OUT_BATCH(batch,
1324               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1325               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1326     ADVANCE_BATCH(batch);
1327 }
1328
1329 static void
1330 i965_render_constant_buffer(VADriverContextP ctx)
1331 {
1332     struct i965_driver_data *i965 = i965_driver_data(ctx);
1333     struct intel_batchbuffer *batch = i965->batch;
1334     struct i965_render_state *render_state = &i965->render_state;
1335
1336     BEGIN_BATCH(batch, 2);
1337     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1338     OUT_RELOC(batch, render_state->curbe.bo,
1339               I915_GEM_DOMAIN_INSTRUCTION, 0,
1340               URB_CS_ENTRY_SIZE - 1);
1341     ADVANCE_BATCH(batch);    
1342 }
1343
1344 static void
1345 i965_render_drawing_rectangle(VADriverContextP ctx)
1346 {
1347     struct i965_driver_data *i965 = i965_driver_data(ctx);
1348     struct intel_batchbuffer *batch = i965->batch;
1349     struct i965_render_state *render_state = &i965->render_state;
1350     struct intel_region *dest_region = render_state->draw_region;
1351
1352     BEGIN_BATCH(batch, 4);
1353     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1354     OUT_BATCH(batch, 0x00000000);
1355     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1356     OUT_BATCH(batch, 0x00000000);         
1357     ADVANCE_BATCH(batch);
1358 }
1359
1360 static void
1361 i965_render_vertex_elements(VADriverContextP ctx)
1362 {
1363     struct i965_driver_data *i965 = i965_driver_data(ctx);
1364     struct intel_batchbuffer *batch = i965->batch;
1365
1366     if (IS_IRONLAKE(i965->intel.device_id)) {
1367         BEGIN_BATCH(batch, 5);
1368         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1369         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1370         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1371                   VE0_VALID |
1372                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1373                   (0 << VE0_OFFSET_SHIFT));
1374         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1375                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1376                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1377                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1378         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1379         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1380                   VE0_VALID |
1381                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1382                   (8 << VE0_OFFSET_SHIFT));
1383         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1384                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1385                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1386                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1387         ADVANCE_BATCH(batch);
1388     } else {
1389         BEGIN_BATCH(batch, 5);
1390         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1391         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1392         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1393                   VE0_VALID |
1394                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1395                   (0 << VE0_OFFSET_SHIFT));
1396         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1397                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1398                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1399                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1400                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1401         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1402         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1403                   VE0_VALID |
1404                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1405                   (8 << VE0_OFFSET_SHIFT));
1406         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1407                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1408                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1409                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1410                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1411         ADVANCE_BATCH(batch);
1412     }
1413 }
1414
1415 static void
1416 i965_render_upload_image_palette(
1417     VADriverContextP ctx,
1418     struct object_image *obj_image,
1419     unsigned int     alpha
1420 )
1421 {
1422     struct i965_driver_data *i965 = i965_driver_data(ctx);
1423     struct intel_batchbuffer *batch = i965->batch;
1424     unsigned int i;
1425
1426     assert(obj_image);
1427
1428     if (!obj_image)
1429         return;
1430
1431     if (obj_image->image.num_palette_entries == 0)
1432         return;
1433
1434     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1435     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1436     /*fill palette*/
1437     //int32_t out[16]; //0-23:color 23-31:alpha
1438     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1439         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1440     ADVANCE_BATCH(batch);
1441 }
1442
1443 static void
1444 i965_render_startup(VADriverContextP ctx)
1445 {
1446     struct i965_driver_data *i965 = i965_driver_data(ctx);
1447     struct intel_batchbuffer *batch = i965->batch;
1448     struct i965_render_state *render_state = &i965->render_state;
1449
1450     BEGIN_BATCH(batch, 11);
1451     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1452     OUT_BATCH(batch, 
1453               (0 << VB0_BUFFER_INDEX_SHIFT) |
1454               VB0_VERTEXDATA |
1455               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1456     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1457
1458     if (IS_IRONLAKE(i965->intel.device_id))
1459         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1460     else
1461         OUT_BATCH(batch, 3);
1462
1463     OUT_BATCH(batch, 0);
1464
1465     OUT_BATCH(batch, 
1466               CMD_3DPRIMITIVE |
1467               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1468               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1469               (0 << 9) |
1470               4);
1471     OUT_BATCH(batch, 3); /* vertex count per instance */
1472     OUT_BATCH(batch, 0); /* start vertex offset */
1473     OUT_BATCH(batch, 1); /* single instance */
1474     OUT_BATCH(batch, 0); /* start instance location */
1475     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1476     ADVANCE_BATCH(batch);
1477 }
1478
1479 static void 
1480 i965_clear_dest_region(VADriverContextP ctx)
1481 {
1482     struct i965_driver_data *i965 = i965_driver_data(ctx);
1483     struct intel_batchbuffer *batch = i965->batch;
1484     struct i965_render_state *render_state = &i965->render_state;
1485     struct intel_region *dest_region = render_state->draw_region;
1486     unsigned int blt_cmd, br13;
1487     int pitch;
1488
1489     blt_cmd = XY_COLOR_BLT_CMD;
1490     br13 = 0xf0 << 16;
1491     pitch = dest_region->pitch;
1492
1493     if (dest_region->cpp == 4) {
1494         br13 |= BR13_8888;
1495         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1496     } else {
1497         assert(dest_region->cpp == 2);
1498         br13 |= BR13_565;
1499     }
1500
1501     if (dest_region->tiling != I915_TILING_NONE) {
1502         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1503         pitch /= 4;
1504     }
1505
1506     br13 |= pitch;
1507
1508     if (IS_GEN6(i965->intel.device_id) ||
1509         IS_GEN7(i965->intel.device_id)) {
1510         intel_batchbuffer_start_atomic_blt(batch, 24);
1511         BEGIN_BLT_BATCH(batch, 6);
1512     } else {
1513         intel_batchbuffer_start_atomic(batch, 24);
1514         BEGIN_BATCH(batch, 6);
1515     }
1516
1517     OUT_BATCH(batch, blt_cmd);
1518     OUT_BATCH(batch, br13);
1519     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1520     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1521               (dest_region->x + dest_region->width));
1522     OUT_RELOC(batch, dest_region->bo, 
1523               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1524               0);
1525     OUT_BATCH(batch, 0x0);
1526     ADVANCE_BATCH(batch);
1527     intel_batchbuffer_end_atomic(batch);
1528 }
1529
1530 static void
1531 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1532 {
1533     struct i965_driver_data *i965 = i965_driver_data(ctx);
1534     struct intel_batchbuffer *batch = i965->batch;
1535
1536     i965_clear_dest_region(ctx);
1537     intel_batchbuffer_start_atomic(batch, 0x1000);
1538     intel_batchbuffer_emit_mi_flush(batch);
1539     i965_render_pipeline_select(ctx);
1540     i965_render_state_sip(ctx);
1541     i965_render_state_base_address(ctx);
1542     i965_render_binding_table_pointers(ctx);
1543     i965_render_constant_color(ctx);
1544     i965_render_pipelined_pointers(ctx);
1545     i965_render_urb_layout(ctx);
1546     i965_render_cs_urb_layout(ctx);
1547     i965_render_constant_buffer(ctx);
1548     i965_render_drawing_rectangle(ctx);
1549     i965_render_vertex_elements(ctx);
1550     i965_render_startup(ctx);
1551     intel_batchbuffer_end_atomic(batch);
1552 }
1553
1554 static void
1555 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1556 {
1557     struct i965_driver_data *i965 = i965_driver_data(ctx);
1558     struct intel_batchbuffer *batch = i965->batch;
1559
1560     intel_batchbuffer_start_atomic(batch, 0x1000);
1561     intel_batchbuffer_emit_mi_flush(batch);
1562     i965_render_pipeline_select(ctx);
1563     i965_render_state_sip(ctx);
1564     i965_render_state_base_address(ctx);
1565     i965_render_binding_table_pointers(ctx);
1566     i965_render_constant_color(ctx);
1567     i965_render_pipelined_pointers(ctx);
1568     i965_render_urb_layout(ctx);
1569     i965_render_cs_urb_layout(ctx);
1570     i965_render_drawing_rectangle(ctx);
1571     i965_render_vertex_elements(ctx);
1572     i965_render_startup(ctx);
1573     intel_batchbuffer_end_atomic(batch);
1574 }
1575
1576
1577 static void 
1578 i965_render_initialize(VADriverContextP ctx)
1579 {
1580     struct i965_driver_data *i965 = i965_driver_data(ctx);
1581     struct i965_render_state *render_state = &i965->render_state;
1582     dri_bo *bo;
1583
1584     /* VERTEX BUFFER */
1585     dri_bo_unreference(render_state->vb.vertex_buffer);
1586     bo = dri_bo_alloc(i965->intel.bufmgr,
1587                       "vertex buffer",
1588                       4096,
1589                       4096);
1590     assert(bo);
1591     render_state->vb.vertex_buffer = bo;
1592
1593     /* VS */
1594     dri_bo_unreference(render_state->vs.state);
1595     bo = dri_bo_alloc(i965->intel.bufmgr,
1596                       "vs state",
1597                       sizeof(struct i965_vs_unit_state),
1598                       64);
1599     assert(bo);
1600     render_state->vs.state = bo;
1601
1602     /* GS */
1603     /* CLIP */
1604     /* SF */
1605     dri_bo_unreference(render_state->sf.state);
1606     bo = dri_bo_alloc(i965->intel.bufmgr,
1607                       "sf state",
1608                       sizeof(struct i965_sf_unit_state),
1609                       64);
1610     assert(bo);
1611     render_state->sf.state = bo;
1612
1613     /* WM */
1614     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1615     bo = dri_bo_alloc(i965->intel.bufmgr,
1616                       "surface state & binding table",
1617                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1618                       4096);
1619     assert(bo);
1620     render_state->wm.surface_state_binding_table_bo = bo;
1621
1622     dri_bo_unreference(render_state->wm.sampler);
1623     bo = dri_bo_alloc(i965->intel.bufmgr,
1624                       "sampler state",
1625                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1626                       64);
1627     assert(bo);
1628     render_state->wm.sampler = bo;
1629     render_state->wm.sampler_count = 0;
1630
1631     dri_bo_unreference(render_state->wm.state);
1632     bo = dri_bo_alloc(i965->intel.bufmgr,
1633                       "wm state",
1634                       sizeof(struct i965_wm_unit_state),
1635                       64);
1636     assert(bo);
1637     render_state->wm.state = bo;
1638
1639     /* COLOR CALCULATOR */
1640     dri_bo_unreference(render_state->cc.state);
1641     bo = dri_bo_alloc(i965->intel.bufmgr,
1642                       "color calc state",
1643                       sizeof(struct i965_cc_unit_state),
1644                       64);
1645     assert(bo);
1646     render_state->cc.state = bo;
1647
1648     dri_bo_unreference(render_state->cc.viewport);
1649     bo = dri_bo_alloc(i965->intel.bufmgr,
1650                       "cc viewport",
1651                       sizeof(struct i965_cc_viewport),
1652                       64);
1653     assert(bo);
1654     render_state->cc.viewport = bo;
1655 }
1656
1657 static void
1658 i965_render_put_surface(
1659     VADriverContextP   ctx,
1660     struct object_surface *obj_surface,
1661     const VARectangle *src_rect,
1662     const VARectangle *dst_rect,
1663     unsigned int       flags
1664 )
1665 {
1666     struct i965_driver_data *i965 = i965_driver_data(ctx);
1667     struct intel_batchbuffer *batch = i965->batch;
1668
1669     i965_render_initialize(ctx);
1670     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1671     i965_surface_render_pipeline_setup(ctx);
1672     intel_batchbuffer_flush(batch);
1673 }
1674
1675 static void
1676 i965_render_put_subpicture(
1677     VADriverContextP   ctx,
1678     struct object_surface *obj_surface,
1679     const VARectangle *src_rect,
1680     const VARectangle *dst_rect
1681 )
1682 {
1683     struct i965_driver_data *i965 = i965_driver_data(ctx);
1684     struct intel_batchbuffer *batch = i965->batch;
1685     unsigned int index = obj_surface->subpic_render_idx;
1686     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1687
1688     assert(obj_subpic);
1689
1690     i965_render_initialize(ctx);
1691     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1692     i965_subpic_render_pipeline_setup(ctx);
1693     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1694     intel_batchbuffer_flush(batch);
1695 }
1696
1697 /*
1698  * for GEN6+
1699  */
1700 static void 
1701 gen6_render_initialize(VADriverContextP ctx)
1702 {
1703     struct i965_driver_data *i965 = i965_driver_data(ctx);
1704     struct i965_render_state *render_state = &i965->render_state;
1705     dri_bo *bo;
1706
1707     /* VERTEX BUFFER */
1708     dri_bo_unreference(render_state->vb.vertex_buffer);
1709     bo = dri_bo_alloc(i965->intel.bufmgr,
1710                       "vertex buffer",
1711                       4096,
1712                       4096);
1713     assert(bo);
1714     render_state->vb.vertex_buffer = bo;
1715
1716     /* WM */
1717     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1718     bo = dri_bo_alloc(i965->intel.bufmgr,
1719                       "surface state & binding table",
1720                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1721                       4096);
1722     assert(bo);
1723     render_state->wm.surface_state_binding_table_bo = bo;
1724
1725     dri_bo_unreference(render_state->wm.sampler);
1726     bo = dri_bo_alloc(i965->intel.bufmgr,
1727                       "sampler state",
1728                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1729                       4096);
1730     assert(bo);
1731     render_state->wm.sampler = bo;
1732     render_state->wm.sampler_count = 0;
1733
1734     /* COLOR CALCULATOR */
1735     dri_bo_unreference(render_state->cc.state);
1736     bo = dri_bo_alloc(i965->intel.bufmgr,
1737                       "color calc state",
1738                       sizeof(struct gen6_color_calc_state),
1739                       4096);
1740     assert(bo);
1741     render_state->cc.state = bo;
1742
1743     /* CC VIEWPORT */
1744     dri_bo_unreference(render_state->cc.viewport);
1745     bo = dri_bo_alloc(i965->intel.bufmgr,
1746                       "cc viewport",
1747                       sizeof(struct i965_cc_viewport),
1748                       4096);
1749     assert(bo);
1750     render_state->cc.viewport = bo;
1751
1752     /* BLEND STATE */
1753     dri_bo_unreference(render_state->cc.blend);
1754     bo = dri_bo_alloc(i965->intel.bufmgr,
1755                       "blend state",
1756                       sizeof(struct gen6_blend_state),
1757                       4096);
1758     assert(bo);
1759     render_state->cc.blend = bo;
1760
1761     /* DEPTH & STENCIL STATE */
1762     dri_bo_unreference(render_state->cc.depth_stencil);
1763     bo = dri_bo_alloc(i965->intel.bufmgr,
1764                       "depth & stencil state",
1765                       sizeof(struct gen6_depth_stencil_state),
1766                       4096);
1767     assert(bo);
1768     render_state->cc.depth_stencil = bo;
1769 }
1770
1771 static void
1772 gen6_render_color_calc_state(VADriverContextP ctx)
1773 {
1774     struct i965_driver_data *i965 = i965_driver_data(ctx);
1775     struct i965_render_state *render_state = &i965->render_state;
1776     struct gen6_color_calc_state *color_calc_state;
1777     
1778     dri_bo_map(render_state->cc.state, 1);
1779     assert(render_state->cc.state->virtual);
1780     color_calc_state = render_state->cc.state->virtual;
1781     memset(color_calc_state, 0, sizeof(*color_calc_state));
1782     color_calc_state->constant_r = 1.0;
1783     color_calc_state->constant_g = 0.0;
1784     color_calc_state->constant_b = 1.0;
1785     color_calc_state->constant_a = 1.0;
1786     dri_bo_unmap(render_state->cc.state);
1787 }
1788
1789 static void
1790 gen6_render_blend_state(VADriverContextP ctx)
1791 {
1792     struct i965_driver_data *i965 = i965_driver_data(ctx);
1793     struct i965_render_state *render_state = &i965->render_state;
1794     struct gen6_blend_state *blend_state;
1795     
1796     dri_bo_map(render_state->cc.blend, 1);
1797     assert(render_state->cc.blend->virtual);
1798     blend_state = render_state->cc.blend->virtual;
1799     memset(blend_state, 0, sizeof(*blend_state));
1800     blend_state->blend1.logic_op_enable = 1;
1801     blend_state->blend1.logic_op_func = 0xc;
1802     dri_bo_unmap(render_state->cc.blend);
1803 }
1804
1805 static void
1806 gen6_render_depth_stencil_state(VADriverContextP ctx)
1807 {
1808     struct i965_driver_data *i965 = i965_driver_data(ctx);
1809     struct i965_render_state *render_state = &i965->render_state;
1810     struct gen6_depth_stencil_state *depth_stencil_state;
1811     
1812     dri_bo_map(render_state->cc.depth_stencil, 1);
1813     assert(render_state->cc.depth_stencil->virtual);
1814     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1815     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1816     dri_bo_unmap(render_state->cc.depth_stencil);
1817 }
1818
1819 static void
1820 gen6_render_setup_states(
1821     VADriverContextP   ctx,
1822     struct object_surface *obj_surface,
1823     const VARectangle *src_rect,
1824     const VARectangle *dst_rect,
1825     unsigned int       flags
1826 )
1827 {
1828     i965_render_dest_surface_state(ctx, 0);
1829     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1830     i965_render_sampler(ctx);
1831     i965_render_cc_viewport(ctx);
1832     gen6_render_color_calc_state(ctx);
1833     gen6_render_blend_state(ctx);
1834     gen6_render_depth_stencil_state(ctx);
1835     i965_render_upload_constants(ctx, obj_surface);
1836     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1837 }
1838
1839 static void
1840 gen6_emit_invarient_states(VADriverContextP ctx)
1841 {
1842     struct i965_driver_data *i965 = i965_driver_data(ctx);
1843     struct intel_batchbuffer *batch = i965->batch;
1844
1845     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1846
1847     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1848     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1849               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1850     OUT_BATCH(batch, 0);
1851
1852     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1853     OUT_BATCH(batch, 1);
1854
1855     /* Set system instruction pointer */
1856     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1857     OUT_BATCH(batch, 0);
1858 }
1859
1860 static void
1861 gen6_emit_state_base_address(VADriverContextP ctx)
1862 {
1863     struct i965_driver_data *i965 = i965_driver_data(ctx);
1864     struct intel_batchbuffer *batch = i965->batch;
1865     struct i965_render_state *render_state = &i965->render_state;
1866
1867     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1868     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1869     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1870     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1871     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1872     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1873     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1874     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1875     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1876     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1877 }
1878
1879 static void
1880 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1881 {
1882     struct i965_driver_data *i965 = i965_driver_data(ctx);
1883     struct intel_batchbuffer *batch = i965->batch;
1884     struct i965_render_state *render_state = &i965->render_state;
1885
1886     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1887               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1888               (4 - 2));
1889     OUT_BATCH(batch, 0);
1890     OUT_BATCH(batch, 0);
1891     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1892 }
1893
1894 static void
1895 gen6_emit_urb(VADriverContextP ctx)
1896 {
1897     struct i965_driver_data *i965 = i965_driver_data(ctx);
1898     struct intel_batchbuffer *batch = i965->batch;
1899
1900     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1901     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1902               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1903     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1904               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1905 }
1906
1907 static void
1908 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1909 {
1910     struct i965_driver_data *i965 = i965_driver_data(ctx);
1911     struct intel_batchbuffer *batch = i965->batch;
1912     struct i965_render_state *render_state = &i965->render_state;
1913
1914     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1915     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1916     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1917     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1918 }
1919
1920 static void
1921 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1922 {
1923     struct i965_driver_data *i965 = i965_driver_data(ctx);
1924     struct intel_batchbuffer *batch = i965->batch;
1925     struct i965_render_state *render_state = &i965->render_state;
1926
1927     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1928               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1929               (4 - 2));
1930     OUT_BATCH(batch, 0); /* VS */
1931     OUT_BATCH(batch, 0); /* GS */
1932     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1933 }
1934
1935 static void
1936 gen6_emit_binding_table(VADriverContextP ctx)
1937 {
1938     struct i965_driver_data *i965 = i965_driver_data(ctx);
1939     struct intel_batchbuffer *batch = i965->batch;
1940
1941     /* Binding table pointers */
1942     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1943               GEN6_BINDING_TABLE_MODIFY_PS |
1944               (4 - 2));
1945     OUT_BATCH(batch, 0);                /* vs */
1946     OUT_BATCH(batch, 0);                /* gs */
1947     /* Only the PS uses the binding table */
1948     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1949 }
1950
1951 static void
1952 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1953 {
1954     struct i965_driver_data *i965 = i965_driver_data(ctx);
1955     struct intel_batchbuffer *batch = i965->batch;
1956
1957     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1958     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1959               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1960     OUT_BATCH(batch, 0);
1961     OUT_BATCH(batch, 0);
1962     OUT_BATCH(batch, 0);
1963     OUT_BATCH(batch, 0);
1964     OUT_BATCH(batch, 0);
1965
1966     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1967     OUT_BATCH(batch, 0);
1968 }
1969
1970 static void
1971 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1972 {
1973     i965_render_drawing_rectangle(ctx);
1974 }
1975
1976 static void 
1977 gen6_emit_vs_state(VADriverContextP ctx)
1978 {
1979     struct i965_driver_data *i965 = i965_driver_data(ctx);
1980     struct intel_batchbuffer *batch = i965->batch;
1981
1982     /* disable VS constant buffer */
1983     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1984     OUT_BATCH(batch, 0);
1985     OUT_BATCH(batch, 0);
1986     OUT_BATCH(batch, 0);
1987     OUT_BATCH(batch, 0);
1988         
1989     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1990     OUT_BATCH(batch, 0); /* without VS kernel */
1991     OUT_BATCH(batch, 0);
1992     OUT_BATCH(batch, 0);
1993     OUT_BATCH(batch, 0);
1994     OUT_BATCH(batch, 0); /* pass-through */
1995 }
1996
1997 static void 
1998 gen6_emit_gs_state(VADriverContextP ctx)
1999 {
2000     struct i965_driver_data *i965 = i965_driver_data(ctx);
2001     struct intel_batchbuffer *batch = i965->batch;
2002
2003     /* disable GS constant buffer */
2004     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2005     OUT_BATCH(batch, 0);
2006     OUT_BATCH(batch, 0);
2007     OUT_BATCH(batch, 0);
2008     OUT_BATCH(batch, 0);
2009         
2010     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2011     OUT_BATCH(batch, 0); /* without GS kernel */
2012     OUT_BATCH(batch, 0);
2013     OUT_BATCH(batch, 0);
2014     OUT_BATCH(batch, 0);
2015     OUT_BATCH(batch, 0);
2016     OUT_BATCH(batch, 0); /* pass-through */
2017 }
2018
2019 static void 
2020 gen6_emit_clip_state(VADriverContextP ctx)
2021 {
2022     struct i965_driver_data *i965 = i965_driver_data(ctx);
2023     struct intel_batchbuffer *batch = i965->batch;
2024
2025     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2026     OUT_BATCH(batch, 0);
2027     OUT_BATCH(batch, 0); /* pass-through */
2028     OUT_BATCH(batch, 0);
2029 }
2030
2031 static void 
2032 gen6_emit_sf_state(VADriverContextP ctx)
2033 {
2034     struct i965_driver_data *i965 = i965_driver_data(ctx);
2035     struct intel_batchbuffer *batch = i965->batch;
2036
2037     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2038     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2039               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2040               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2041     OUT_BATCH(batch, 0);
2042     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2043     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2044     OUT_BATCH(batch, 0);
2045     OUT_BATCH(batch, 0);
2046     OUT_BATCH(batch, 0);
2047     OUT_BATCH(batch, 0);
2048     OUT_BATCH(batch, 0); /* DW9 */
2049     OUT_BATCH(batch, 0);
2050     OUT_BATCH(batch, 0);
2051     OUT_BATCH(batch, 0);
2052     OUT_BATCH(batch, 0);
2053     OUT_BATCH(batch, 0); /* DW14 */
2054     OUT_BATCH(batch, 0);
2055     OUT_BATCH(batch, 0);
2056     OUT_BATCH(batch, 0);
2057     OUT_BATCH(batch, 0);
2058     OUT_BATCH(batch, 0); /* DW19 */
2059 }
2060
2061 static void 
2062 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2063 {
2064     struct i965_driver_data *i965 = i965_driver_data(ctx);
2065     struct intel_batchbuffer *batch = i965->batch;
2066     struct i965_render_state *render_state = &i965->render_state;
2067
2068     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2069               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2070               (5 - 2));
2071     OUT_RELOC(batch, 
2072               render_state->curbe.bo,
2073               I915_GEM_DOMAIN_INSTRUCTION, 0,
2074               (URB_CS_ENTRY_SIZE-1));
2075     OUT_BATCH(batch, 0);
2076     OUT_BATCH(batch, 0);
2077     OUT_BATCH(batch, 0);
2078
2079     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2080     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2081               I915_GEM_DOMAIN_INSTRUCTION, 0,
2082               0);
2083     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2084               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2085     OUT_BATCH(batch, 0);
2086     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2087     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2088               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2089               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2090     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2091               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2092     OUT_BATCH(batch, 0);
2093     OUT_BATCH(batch, 0);
2094 }
2095
2096 static void
2097 gen6_emit_vertex_element_state(VADriverContextP ctx)
2098 {
2099     struct i965_driver_data *i965 = i965_driver_data(ctx);
2100     struct intel_batchbuffer *batch = i965->batch;
2101
2102     /* Set up our vertex elements, sourced from the single vertex buffer. */
2103     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2104     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2105     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2106               GEN6_VE0_VALID |
2107               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2108               (0 << VE0_OFFSET_SHIFT));
2109     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2110               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2111               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2112               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2113     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2114     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2115               GEN6_VE0_VALID |
2116               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2117               (8 << VE0_OFFSET_SHIFT));
2118     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2119               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2120               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2121               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2122 }
2123
2124 static void
2125 gen6_emit_vertices(VADriverContextP ctx)
2126 {
2127     struct i965_driver_data *i965 = i965_driver_data(ctx);
2128     struct intel_batchbuffer *batch = i965->batch;
2129     struct i965_render_state *render_state = &i965->render_state;
2130
2131     BEGIN_BATCH(batch, 11);
2132     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2133     OUT_BATCH(batch, 
2134               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2135               GEN6_VB0_VERTEXDATA |
2136               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2137     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2138     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2139     OUT_BATCH(batch, 0);
2140
2141     OUT_BATCH(batch, 
2142               CMD_3DPRIMITIVE |
2143               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2144               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2145               (0 << 9) |
2146               4);
2147     OUT_BATCH(batch, 3); /* vertex count per instance */
2148     OUT_BATCH(batch, 0); /* start vertex offset */
2149     OUT_BATCH(batch, 1); /* single instance */
2150     OUT_BATCH(batch, 0); /* start instance location */
2151     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2152     ADVANCE_BATCH(batch);
2153 }
2154
2155 static void
2156 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2157 {
2158     struct i965_driver_data *i965 = i965_driver_data(ctx);
2159     struct intel_batchbuffer *batch = i965->batch;
2160
2161     intel_batchbuffer_start_atomic(batch, 0x1000);
2162     intel_batchbuffer_emit_mi_flush(batch);
2163     gen6_emit_invarient_states(ctx);
2164     gen6_emit_state_base_address(ctx);
2165     gen6_emit_viewport_state_pointers(ctx);
2166     gen6_emit_urb(ctx);
2167     gen6_emit_cc_state_pointers(ctx);
2168     gen6_emit_sampler_state_pointers(ctx);
2169     gen6_emit_vs_state(ctx);
2170     gen6_emit_gs_state(ctx);
2171     gen6_emit_clip_state(ctx);
2172     gen6_emit_sf_state(ctx);
2173     gen6_emit_wm_state(ctx, kernel);
2174     gen6_emit_binding_table(ctx);
2175     gen6_emit_depth_buffer_state(ctx);
2176     gen6_emit_drawing_rectangle(ctx);
2177     gen6_emit_vertex_element_state(ctx);
2178     gen6_emit_vertices(ctx);
2179     intel_batchbuffer_end_atomic(batch);
2180 }
2181
2182 static void
2183 gen6_render_put_surface(
2184     VADriverContextP   ctx,
2185     struct object_surface *obj_surface,
2186     const VARectangle *src_rect,
2187     const VARectangle *dst_rect,
2188     unsigned int       flags
2189 )
2190 {
2191     struct i965_driver_data *i965 = i965_driver_data(ctx);
2192     struct intel_batchbuffer *batch = i965->batch;
2193
2194     gen6_render_initialize(ctx);
2195     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2196     i965_clear_dest_region(ctx);
2197     gen6_render_emit_states(ctx, PS_KERNEL);
2198     intel_batchbuffer_flush(batch);
2199 }
2200
2201 static void
2202 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2203 {
2204     struct i965_driver_data *i965 = i965_driver_data(ctx);
2205     struct i965_render_state *render_state = &i965->render_state;
2206     struct gen6_blend_state *blend_state;
2207
2208     dri_bo_unmap(render_state->cc.state);    
2209     dri_bo_map(render_state->cc.blend, 1);
2210     assert(render_state->cc.blend->virtual);
2211     blend_state = render_state->cc.blend->virtual;
2212     memset(blend_state, 0, sizeof(*blend_state));
2213     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2214     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2215     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2216     blend_state->blend0.blend_enable = 1;
2217     blend_state->blend1.post_blend_clamp_enable = 1;
2218     blend_state->blend1.pre_blend_clamp_enable = 1;
2219     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2220     dri_bo_unmap(render_state->cc.blend);
2221 }
2222
2223 static void
2224 gen6_subpicture_render_setup_states(
2225     VADriverContextP   ctx,
2226     struct object_surface *obj_surface,
2227     const VARectangle *src_rect,
2228     const VARectangle *dst_rect
2229 )
2230 {
2231     i965_render_dest_surface_state(ctx, 0);
2232     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2233     i965_render_sampler(ctx);
2234     i965_render_cc_viewport(ctx);
2235     gen6_render_color_calc_state(ctx);
2236     gen6_subpicture_render_blend_state(ctx);
2237     gen6_render_depth_stencil_state(ctx);
2238     i965_subpic_render_upload_constants(ctx, obj_surface);
2239     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2240 }
2241
2242 static void
2243 gen6_render_put_subpicture(
2244     VADriverContextP   ctx,
2245     struct object_surface *obj_surface,
2246     const VARectangle *src_rect,
2247     const VARectangle *dst_rect
2248 )
2249 {
2250     struct i965_driver_data *i965 = i965_driver_data(ctx);
2251     struct intel_batchbuffer *batch = i965->batch;
2252     unsigned int index = obj_surface->subpic_render_idx;
2253     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2254
2255     assert(obj_subpic);
2256     gen6_render_initialize(ctx);
2257     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2258     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2259     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2260     intel_batchbuffer_flush(batch);
2261 }
2262
2263 /*
2264  * for GEN7
2265  */
2266 static void 
2267 gen7_render_initialize(VADriverContextP ctx)
2268 {
2269     struct i965_driver_data *i965 = i965_driver_data(ctx);
2270     struct i965_render_state *render_state = &i965->render_state;
2271     dri_bo *bo;
2272
2273     /* VERTEX BUFFER */
2274     dri_bo_unreference(render_state->vb.vertex_buffer);
2275     bo = dri_bo_alloc(i965->intel.bufmgr,
2276                       "vertex buffer",
2277                       4096,
2278                       4096);
2279     assert(bo);
2280     render_state->vb.vertex_buffer = bo;
2281
2282     /* WM */
2283     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2284     bo = dri_bo_alloc(i965->intel.bufmgr,
2285                       "surface state & binding table",
2286                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2287                       4096);
2288     assert(bo);
2289     render_state->wm.surface_state_binding_table_bo = bo;
2290
2291     dri_bo_unreference(render_state->wm.sampler);
2292     bo = dri_bo_alloc(i965->intel.bufmgr,
2293                       "sampler state",
2294                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2295                       4096);
2296     assert(bo);
2297     render_state->wm.sampler = bo;
2298     render_state->wm.sampler_count = 0;
2299
2300     /* COLOR CALCULATOR */
2301     dri_bo_unreference(render_state->cc.state);
2302     bo = dri_bo_alloc(i965->intel.bufmgr,
2303                       "color calc state",
2304                       sizeof(struct gen6_color_calc_state),
2305                       4096);
2306     assert(bo);
2307     render_state->cc.state = bo;
2308
2309     /* CC VIEWPORT */
2310     dri_bo_unreference(render_state->cc.viewport);
2311     bo = dri_bo_alloc(i965->intel.bufmgr,
2312                       "cc viewport",
2313                       sizeof(struct i965_cc_viewport),
2314                       4096);
2315     assert(bo);
2316     render_state->cc.viewport = bo;
2317
2318     /* BLEND STATE */
2319     dri_bo_unreference(render_state->cc.blend);
2320     bo = dri_bo_alloc(i965->intel.bufmgr,
2321                       "blend state",
2322                       sizeof(struct gen6_blend_state),
2323                       4096);
2324     assert(bo);
2325     render_state->cc.blend = bo;
2326
2327     /* DEPTH & STENCIL STATE */
2328     dri_bo_unreference(render_state->cc.depth_stencil);
2329     bo = dri_bo_alloc(i965->intel.bufmgr,
2330                       "depth & stencil state",
2331                       sizeof(struct gen6_depth_stencil_state),
2332                       4096);
2333     assert(bo);
2334     render_state->cc.depth_stencil = bo;
2335 }
2336
2337 static void
2338 gen7_render_color_calc_state(VADriverContextP ctx)
2339 {
2340     struct i965_driver_data *i965 = i965_driver_data(ctx);
2341     struct i965_render_state *render_state = &i965->render_state;
2342     struct gen6_color_calc_state *color_calc_state;
2343     
2344     dri_bo_map(render_state->cc.state, 1);
2345     assert(render_state->cc.state->virtual);
2346     color_calc_state = render_state->cc.state->virtual;
2347     memset(color_calc_state, 0, sizeof(*color_calc_state));
2348     color_calc_state->constant_r = 1.0;
2349     color_calc_state->constant_g = 0.0;
2350     color_calc_state->constant_b = 1.0;
2351     color_calc_state->constant_a = 1.0;
2352     dri_bo_unmap(render_state->cc.state);
2353 }
2354
2355 static void
2356 gen7_render_blend_state(VADriverContextP ctx)
2357 {
2358     struct i965_driver_data *i965 = i965_driver_data(ctx);
2359     struct i965_render_state *render_state = &i965->render_state;
2360     struct gen6_blend_state *blend_state;
2361     
2362     dri_bo_map(render_state->cc.blend, 1);
2363     assert(render_state->cc.blend->virtual);
2364     blend_state = render_state->cc.blend->virtual;
2365     memset(blend_state, 0, sizeof(*blend_state));
2366     blend_state->blend1.logic_op_enable = 1;
2367     blend_state->blend1.logic_op_func = 0xc;
2368     blend_state->blend1.pre_blend_clamp_enable = 1;
2369     dri_bo_unmap(render_state->cc.blend);
2370 }
2371
2372 static void
2373 gen7_render_depth_stencil_state(VADriverContextP ctx)
2374 {
2375     struct i965_driver_data *i965 = i965_driver_data(ctx);
2376     struct i965_render_state *render_state = &i965->render_state;
2377     struct gen6_depth_stencil_state *depth_stencil_state;
2378     
2379     dri_bo_map(render_state->cc.depth_stencil, 1);
2380     assert(render_state->cc.depth_stencil->virtual);
2381     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2382     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2383     dri_bo_unmap(render_state->cc.depth_stencil);
2384 }
2385
2386 static void 
2387 gen7_render_sampler(VADriverContextP ctx)
2388 {
2389     struct i965_driver_data *i965 = i965_driver_data(ctx);
2390     struct i965_render_state *render_state = &i965->render_state;
2391     struct gen7_sampler_state *sampler_state;
2392     int i;
2393     
2394     assert(render_state->wm.sampler_count > 0);
2395     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2396
2397     dri_bo_map(render_state->wm.sampler, 1);
2398     assert(render_state->wm.sampler->virtual);
2399     sampler_state = render_state->wm.sampler->virtual;
2400     for (i = 0; i < render_state->wm.sampler_count; i++) {
2401         memset(sampler_state, 0, sizeof(*sampler_state));
2402         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2403         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2404         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2405         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2406         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2407         sampler_state++;
2408     }
2409
2410     dri_bo_unmap(render_state->wm.sampler);
2411 }
2412
2413 static void
2414 gen7_render_setup_states(
2415     VADriverContextP   ctx,
2416     struct object_surface *obj_surface,
2417     const VARectangle *src_rect,
2418     const VARectangle *dst_rect,
2419     unsigned int       flags
2420 )
2421 {
2422     i965_render_dest_surface_state(ctx, 0);
2423     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2424     gen7_render_sampler(ctx);
2425     i965_render_cc_viewport(ctx);
2426     gen7_render_color_calc_state(ctx);
2427     gen7_render_blend_state(ctx);
2428     gen7_render_depth_stencil_state(ctx);
2429     i965_render_upload_constants(ctx, obj_surface);
2430     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2431 }
2432
2433 static void
2434 gen7_emit_invarient_states(VADriverContextP ctx)
2435 {
2436     struct i965_driver_data *i965 = i965_driver_data(ctx);
2437     struct intel_batchbuffer *batch = i965->batch;
2438
2439     BEGIN_BATCH(batch, 1);
2440     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2441     ADVANCE_BATCH(batch);
2442
2443     BEGIN_BATCH(batch, 4);
2444     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2445     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2446               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2447     OUT_BATCH(batch, 0);
2448     OUT_BATCH(batch, 0);
2449     ADVANCE_BATCH(batch);
2450
2451     BEGIN_BATCH(batch, 2);
2452     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2453     OUT_BATCH(batch, 1);
2454     ADVANCE_BATCH(batch);
2455
2456     /* Set system instruction pointer */
2457     BEGIN_BATCH(batch, 2);
2458     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2459     OUT_BATCH(batch, 0);
2460     ADVANCE_BATCH(batch);
2461 }
2462
2463 static void
2464 gen7_emit_state_base_address(VADriverContextP ctx)
2465 {
2466     struct i965_driver_data *i965 = i965_driver_data(ctx);
2467     struct intel_batchbuffer *batch = i965->batch;
2468     struct i965_render_state *render_state = &i965->render_state;
2469
2470     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2471     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2472     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2473     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2474     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2475     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2476     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2477     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2478     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2479     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2480 }
2481
2482 static void
2483 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2484 {
2485     struct i965_driver_data *i965 = i965_driver_data(ctx);
2486     struct intel_batchbuffer *batch = i965->batch;
2487     struct i965_render_state *render_state = &i965->render_state;
2488
2489     BEGIN_BATCH(batch, 2);
2490     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2491     OUT_RELOC(batch,
2492               render_state->cc.viewport,
2493               I915_GEM_DOMAIN_INSTRUCTION, 0,
2494               0);
2495     ADVANCE_BATCH(batch);
2496
2497     BEGIN_BATCH(batch, 2);
2498     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2499     OUT_BATCH(batch, 0);
2500     ADVANCE_BATCH(batch);
2501 }
2502
2503 /*
2504  * URB layout on GEN7 
2505  * ----------------------------------------
2506  * | PS Push Constants (8KB) | VS entries |
2507  * ----------------------------------------
2508  */
2509 static void
2510 gen7_emit_urb(VADriverContextP ctx)
2511 {
2512     struct i965_driver_data *i965 = i965_driver_data(ctx);
2513     struct intel_batchbuffer *batch = i965->batch;
2514     unsigned int num_urb_entries = 32;
2515
2516     if (IS_HASWELL(i965->intel.device_id))
2517         num_urb_entries = 64;
2518
2519     BEGIN_BATCH(batch, 2);
2520     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2521     OUT_BATCH(batch, 8); /* in 1KBs */
2522     ADVANCE_BATCH(batch);
2523
2524     BEGIN_BATCH(batch, 2);
2525     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2526     OUT_BATCH(batch, 
2527               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2528               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2529               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2530    ADVANCE_BATCH(batch);
2531
2532    BEGIN_BATCH(batch, 2);
2533    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2534    OUT_BATCH(batch,
2535              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2536              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2537    ADVANCE_BATCH(batch);
2538
2539    BEGIN_BATCH(batch, 2);
2540    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2541    OUT_BATCH(batch,
2542              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2543              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2544    ADVANCE_BATCH(batch);
2545
2546    BEGIN_BATCH(batch, 2);
2547    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2548    OUT_BATCH(batch,
2549              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2550              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2551    ADVANCE_BATCH(batch);
2552 }
2553
2554 static void
2555 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2556 {
2557     struct i965_driver_data *i965 = i965_driver_data(ctx);
2558     struct intel_batchbuffer *batch = i965->batch;
2559     struct i965_render_state *render_state = &i965->render_state;
2560
2561     BEGIN_BATCH(batch, 2);
2562     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2563     OUT_RELOC(batch,
2564               render_state->cc.state,
2565               I915_GEM_DOMAIN_INSTRUCTION, 0,
2566               1);
2567     ADVANCE_BATCH(batch);
2568
2569     BEGIN_BATCH(batch, 2);
2570     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2571     OUT_RELOC(batch,
2572               render_state->cc.blend,
2573               I915_GEM_DOMAIN_INSTRUCTION, 0,
2574               1);
2575     ADVANCE_BATCH(batch);
2576
2577     BEGIN_BATCH(batch, 2);
2578     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2579     OUT_RELOC(batch,
2580               render_state->cc.depth_stencil,
2581               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2582               1);
2583     ADVANCE_BATCH(batch);
2584 }
2585
2586 static void
2587 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2588 {
2589     struct i965_driver_data *i965 = i965_driver_data(ctx);
2590     struct intel_batchbuffer *batch = i965->batch;
2591     struct i965_render_state *render_state = &i965->render_state;
2592
2593     BEGIN_BATCH(batch, 2);
2594     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2595     OUT_RELOC(batch,
2596               render_state->wm.sampler,
2597               I915_GEM_DOMAIN_INSTRUCTION, 0,
2598               0);
2599     ADVANCE_BATCH(batch);
2600 }
2601
2602 static void
2603 gen7_emit_binding_table(VADriverContextP ctx)
2604 {
2605     struct i965_driver_data *i965 = i965_driver_data(ctx);
2606     struct intel_batchbuffer *batch = i965->batch;
2607
2608     BEGIN_BATCH(batch, 2);
2609     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2610     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2611     ADVANCE_BATCH(batch);
2612 }
2613
2614 static void
2615 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2616 {
2617     struct i965_driver_data *i965 = i965_driver_data(ctx);
2618     struct intel_batchbuffer *batch = i965->batch;
2619
2620     BEGIN_BATCH(batch, 7);
2621     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2622     OUT_BATCH(batch,
2623               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2624               (I965_SURFACE_NULL << 29));
2625     OUT_BATCH(batch, 0);
2626     OUT_BATCH(batch, 0);
2627     OUT_BATCH(batch, 0);
2628     OUT_BATCH(batch, 0);
2629     OUT_BATCH(batch, 0);
2630     ADVANCE_BATCH(batch);
2631
2632     BEGIN_BATCH(batch, 3);
2633     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2634     OUT_BATCH(batch, 0);
2635     OUT_BATCH(batch, 0);
2636     ADVANCE_BATCH(batch);
2637 }
2638
2639 static void
2640 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2641 {
2642     i965_render_drawing_rectangle(ctx);
2643 }
2644
2645 static void 
2646 gen7_emit_vs_state(VADriverContextP ctx)
2647 {
2648     struct i965_driver_data *i965 = i965_driver_data(ctx);
2649     struct intel_batchbuffer *batch = i965->batch;
2650
2651     /* disable VS constant buffer */
2652     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2653     OUT_BATCH(batch, 0);
2654     OUT_BATCH(batch, 0);
2655     OUT_BATCH(batch, 0);
2656     OUT_BATCH(batch, 0);
2657     OUT_BATCH(batch, 0);
2658     OUT_BATCH(batch, 0);
2659         
2660     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2661     OUT_BATCH(batch, 0); /* without VS kernel */
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, 0);
2664     OUT_BATCH(batch, 0);
2665     OUT_BATCH(batch, 0); /* pass-through */
2666 }
2667
2668 static void 
2669 gen7_emit_bypass_state(VADriverContextP ctx)
2670 {
2671     struct i965_driver_data *i965 = i965_driver_data(ctx);
2672     struct intel_batchbuffer *batch = i965->batch;
2673
2674     /* bypass GS */
2675     BEGIN_BATCH(batch, 7);
2676     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0);
2680     OUT_BATCH(batch, 0);
2681     OUT_BATCH(batch, 0);
2682     OUT_BATCH(batch, 0);
2683     ADVANCE_BATCH(batch);
2684
2685     BEGIN_BATCH(batch, 7);      
2686     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2687     OUT_BATCH(batch, 0); /* without GS kernel */
2688     OUT_BATCH(batch, 0);
2689     OUT_BATCH(batch, 0);
2690     OUT_BATCH(batch, 0);
2691     OUT_BATCH(batch, 0);
2692     OUT_BATCH(batch, 0); /* pass-through */
2693     ADVANCE_BATCH(batch);
2694
2695     BEGIN_BATCH(batch, 2);
2696     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2697     OUT_BATCH(batch, 0);
2698     ADVANCE_BATCH(batch);
2699
2700     /* disable HS */
2701     BEGIN_BATCH(batch, 7);
2702     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2703     OUT_BATCH(batch, 0);
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0);
2708     OUT_BATCH(batch, 0);
2709     ADVANCE_BATCH(batch);
2710
2711     BEGIN_BATCH(batch, 7);
2712     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     OUT_BATCH(batch, 0);
2716     OUT_BATCH(batch, 0);
2717     OUT_BATCH(batch, 0);
2718     OUT_BATCH(batch, 0);
2719     ADVANCE_BATCH(batch);
2720
2721     BEGIN_BATCH(batch, 2);
2722     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2723     OUT_BATCH(batch, 0);
2724     ADVANCE_BATCH(batch);
2725
2726     /* Disable TE */
2727     BEGIN_BATCH(batch, 4);
2728     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2729     OUT_BATCH(batch, 0);
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     ADVANCE_BATCH(batch);
2733
2734     /* Disable DS */
2735     BEGIN_BATCH(batch, 7);
2736     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2737     OUT_BATCH(batch, 0);
2738     OUT_BATCH(batch, 0);
2739     OUT_BATCH(batch, 0);
2740     OUT_BATCH(batch, 0);
2741     OUT_BATCH(batch, 0);
2742     OUT_BATCH(batch, 0);
2743     ADVANCE_BATCH(batch);
2744
2745     BEGIN_BATCH(batch, 6);
2746     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2747     OUT_BATCH(batch, 0);
2748     OUT_BATCH(batch, 0);
2749     OUT_BATCH(batch, 0);
2750     OUT_BATCH(batch, 0);
2751     OUT_BATCH(batch, 0);
2752     ADVANCE_BATCH(batch);
2753
2754     BEGIN_BATCH(batch, 2);
2755     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2756     OUT_BATCH(batch, 0);
2757     ADVANCE_BATCH(batch);
2758
2759     /* Disable STREAMOUT */
2760     BEGIN_BATCH(batch, 3);
2761     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2762     OUT_BATCH(batch, 0);
2763     OUT_BATCH(batch, 0);
2764     ADVANCE_BATCH(batch);
2765 }
2766
2767 static void 
2768 gen7_emit_clip_state(VADriverContextP ctx)
2769 {
2770     struct i965_driver_data *i965 = i965_driver_data(ctx);
2771     struct intel_batchbuffer *batch = i965->batch;
2772
2773     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2774     OUT_BATCH(batch, 0);
2775     OUT_BATCH(batch, 0); /* pass-through */
2776     OUT_BATCH(batch, 0);
2777 }
2778
2779 static void 
2780 gen7_emit_sf_state(VADriverContextP ctx)
2781 {
2782     struct i965_driver_data *i965 = i965_driver_data(ctx);
2783     struct intel_batchbuffer *batch = i965->batch;
2784
2785     BEGIN_BATCH(batch, 14);
2786     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2787     OUT_BATCH(batch,
2788               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2789               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2790               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2791     OUT_BATCH(batch, 0);
2792     OUT_BATCH(batch, 0);
2793     OUT_BATCH(batch, 0); /* DW4 */
2794     OUT_BATCH(batch, 0);
2795     OUT_BATCH(batch, 0);
2796     OUT_BATCH(batch, 0);
2797     OUT_BATCH(batch, 0);
2798     OUT_BATCH(batch, 0); /* DW9 */
2799     OUT_BATCH(batch, 0);
2800     OUT_BATCH(batch, 0);
2801     OUT_BATCH(batch, 0);
2802     OUT_BATCH(batch, 0);
2803     ADVANCE_BATCH(batch);
2804
2805     BEGIN_BATCH(batch, 7);
2806     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2807     OUT_BATCH(batch, 0);
2808     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2809     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2810     OUT_BATCH(batch, 0);
2811     OUT_BATCH(batch, 0);
2812     OUT_BATCH(batch, 0);
2813     ADVANCE_BATCH(batch);
2814 }
2815
2816 static void 
2817 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2818 {
2819     struct i965_driver_data *i965 = i965_driver_data(ctx);
2820     struct intel_batchbuffer *batch = i965->batch;
2821     struct i965_render_state *render_state = &i965->render_state;
2822     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2823     unsigned int num_samples = 0;
2824
2825     if (IS_HASWELL(i965->intel.device_id)) {
2826         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2827         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2828     }
2829
2830     BEGIN_BATCH(batch, 3);
2831     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2832     OUT_BATCH(batch,
2833               GEN7_WM_DISPATCH_ENABLE |
2834               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2835     OUT_BATCH(batch, 0);
2836     ADVANCE_BATCH(batch);
2837
2838     BEGIN_BATCH(batch, 7);
2839     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2840     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2841     OUT_BATCH(batch, 0);
2842     OUT_RELOC(batch, 
2843               render_state->curbe.bo,
2844               I915_GEM_DOMAIN_INSTRUCTION, 0,
2845               0);
2846     OUT_BATCH(batch, 0);
2847     OUT_BATCH(batch, 0);
2848     OUT_BATCH(batch, 0);
2849     ADVANCE_BATCH(batch);
2850
2851     BEGIN_BATCH(batch, 8);
2852     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2853     OUT_RELOC(batch, 
2854               render_state->render_kernels[kernel].bo,
2855               I915_GEM_DOMAIN_INSTRUCTION, 0,
2856               0);
2857     OUT_BATCH(batch, 
2858               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2859               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2860     OUT_BATCH(batch, 0); /* scratch space base offset */
2861     OUT_BATCH(batch, 
2862               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
2863               GEN7_PS_PUSH_CONSTANT_ENABLE |
2864               GEN7_PS_ATTRIBUTE_ENABLE |
2865               GEN7_PS_16_DISPATCH_ENABLE);
2866     OUT_BATCH(batch, 
2867               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2868     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2869     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2870     ADVANCE_BATCH(batch);
2871 }
2872
2873 static void
2874 gen7_emit_vertex_element_state(VADriverContextP ctx)
2875 {
2876     struct i965_driver_data *i965 = i965_driver_data(ctx);
2877     struct intel_batchbuffer *batch = i965->batch;
2878
2879     /* Set up our vertex elements, sourced from the single vertex buffer. */
2880     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2881     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2882     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2883               GEN6_VE0_VALID |
2884               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2885               (0 << VE0_OFFSET_SHIFT));
2886     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2887               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2888               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2889               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2890     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2891     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2892               GEN6_VE0_VALID |
2893               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2894               (8 << VE0_OFFSET_SHIFT));
2895     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2896               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2897               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2898               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2899 }
2900
2901 static void
2902 gen7_emit_vertices(VADriverContextP ctx)
2903 {
2904     struct i965_driver_data *i965 = i965_driver_data(ctx);
2905     struct intel_batchbuffer *batch = i965->batch;
2906     struct i965_render_state *render_state = &i965->render_state;
2907
2908     BEGIN_BATCH(batch, 5);
2909     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2910     OUT_BATCH(batch, 
2911               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2912               GEN6_VB0_VERTEXDATA |
2913               GEN7_VB0_ADDRESS_MODIFYENABLE |
2914               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2915     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2916     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2917     OUT_BATCH(batch, 0);
2918     ADVANCE_BATCH(batch);
2919
2920     BEGIN_BATCH(batch, 7);
2921     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2922     OUT_BATCH(batch,
2923               _3DPRIM_RECTLIST |
2924               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2925     OUT_BATCH(batch, 3); /* vertex count per instance */
2926     OUT_BATCH(batch, 0); /* start vertex offset */
2927     OUT_BATCH(batch, 1); /* single instance */
2928     OUT_BATCH(batch, 0); /* start instance location */
2929     OUT_BATCH(batch, 0);
2930     ADVANCE_BATCH(batch);
2931 }
2932
2933 static void
2934 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2935 {
2936     struct i965_driver_data *i965 = i965_driver_data(ctx);
2937     struct intel_batchbuffer *batch = i965->batch;
2938
2939     intel_batchbuffer_start_atomic(batch, 0x1000);
2940     intel_batchbuffer_emit_mi_flush(batch);
2941     gen7_emit_invarient_states(ctx);
2942     gen7_emit_state_base_address(ctx);
2943     gen7_emit_viewport_state_pointers(ctx);
2944     gen7_emit_urb(ctx);
2945     gen7_emit_cc_state_pointers(ctx);
2946     gen7_emit_sampler_state_pointers(ctx);
2947     gen7_emit_bypass_state(ctx);
2948     gen7_emit_vs_state(ctx);
2949     gen7_emit_clip_state(ctx);
2950     gen7_emit_sf_state(ctx);
2951     gen7_emit_wm_state(ctx, kernel);
2952     gen7_emit_binding_table(ctx);
2953     gen7_emit_depth_buffer_state(ctx);
2954     gen7_emit_drawing_rectangle(ctx);
2955     gen7_emit_vertex_element_state(ctx);
2956     gen7_emit_vertices(ctx);
2957     intel_batchbuffer_end_atomic(batch);
2958 }
2959
2960 static void
2961 gen7_render_put_surface(
2962     VADriverContextP   ctx,
2963     struct object_surface *obj_surface,    
2964     const VARectangle *src_rect,
2965     const VARectangle *dst_rect,
2966     unsigned int       flags
2967 )
2968 {
2969     struct i965_driver_data *i965 = i965_driver_data(ctx);
2970     struct intel_batchbuffer *batch = i965->batch;
2971
2972     gen7_render_initialize(ctx);
2973     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2974     i965_clear_dest_region(ctx);
2975     gen7_render_emit_states(ctx, PS_KERNEL);
2976     intel_batchbuffer_flush(batch);
2977 }
2978
2979 static void
2980 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2981 {
2982     struct i965_driver_data *i965 = i965_driver_data(ctx);
2983     struct i965_render_state *render_state = &i965->render_state;
2984     struct gen6_blend_state *blend_state;
2985
2986     dri_bo_unmap(render_state->cc.state);    
2987     dri_bo_map(render_state->cc.blend, 1);
2988     assert(render_state->cc.blend->virtual);
2989     blend_state = render_state->cc.blend->virtual;
2990     memset(blend_state, 0, sizeof(*blend_state));
2991     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2992     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2993     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2994     blend_state->blend0.blend_enable = 1;
2995     blend_state->blend1.post_blend_clamp_enable = 1;
2996     blend_state->blend1.pre_blend_clamp_enable = 1;
2997     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2998     dri_bo_unmap(render_state->cc.blend);
2999 }
3000
3001 static void
3002 gen7_subpicture_render_setup_states(
3003     VADriverContextP   ctx,
3004     struct object_surface *obj_surface,
3005     const VARectangle *src_rect,
3006     const VARectangle *dst_rect
3007 )
3008 {
3009     i965_render_dest_surface_state(ctx, 0);
3010     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3011     i965_render_sampler(ctx);
3012     i965_render_cc_viewport(ctx);
3013     gen7_render_color_calc_state(ctx);
3014     gen7_subpicture_render_blend_state(ctx);
3015     gen7_render_depth_stencil_state(ctx);
3016     i965_subpic_render_upload_constants(ctx, obj_surface);
3017     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3018 }
3019
3020 static void
3021 gen7_render_put_subpicture(
3022     VADriverContextP   ctx,
3023     struct object_surface *obj_surface,
3024     const VARectangle *src_rect,
3025     const VARectangle *dst_rect
3026 )
3027 {
3028     struct i965_driver_data *i965 = i965_driver_data(ctx);
3029     struct intel_batchbuffer *batch = i965->batch;
3030     unsigned int index = obj_surface->subpic_render_idx;
3031     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3032
3033     assert(obj_subpic);
3034     gen7_render_initialize(ctx);
3035     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3036     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3037     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3038     intel_batchbuffer_flush(batch);
3039 }
3040
3041
3042 /*
3043  * global functions
3044  */
3045 VAStatus 
3046 i965_DestroySurfaces(VADriverContextP ctx,
3047                      VASurfaceID *surface_list,
3048                      int num_surfaces);
3049 void
3050 intel_render_put_surface(
3051     VADriverContextP   ctx,
3052     struct object_surface *obj_surface,
3053     const VARectangle *src_rect,
3054     const VARectangle *dst_rect,
3055     unsigned int       flags
3056 )
3057 {
3058     struct i965_driver_data *i965 = i965_driver_data(ctx);
3059     int has_done_scaling = 0;
3060     VASurfaceID out_surface_id = i965_post_processing(ctx,
3061                                                       obj_surface,
3062                                                       src_rect,
3063                                                       dst_rect,
3064                                                       flags,
3065                                                       &has_done_scaling);
3066
3067     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3068
3069     if (out_surface_id != VA_INVALID_ID) {
3070         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3071         
3072         if (new_obj_surface && new_obj_surface->bo)
3073             obj_surface = new_obj_surface;
3074
3075         if (has_done_scaling)
3076             src_rect = dst_rect;
3077     }
3078
3079     if (IS_GEN7(i965->intel.device_id))
3080         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3081     else if (IS_GEN6(i965->intel.device_id))
3082         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3083     else
3084         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3085
3086     if (out_surface_id != VA_INVALID_ID)
3087         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3088 }
3089
3090 void
3091 intel_render_put_subpicture(
3092     VADriverContextP   ctx,
3093     struct object_surface *obj_surface,
3094     const VARectangle *src_rect,
3095     const VARectangle *dst_rect
3096 )
3097 {
3098     struct i965_driver_data *i965 = i965_driver_data(ctx);
3099
3100     if (IS_GEN7(i965->intel.device_id))
3101         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3102     else if (IS_GEN6(i965->intel.device_id))
3103         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3104     else
3105         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3106 }
3107
3108 bool 
3109 i965_render_init(VADriverContextP ctx)
3110 {
3111     struct i965_driver_data *i965 = i965_driver_data(ctx);
3112     struct i965_render_state *render_state = &i965->render_state;
3113     int i;
3114
3115     /* kernel */
3116     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3117                                  sizeof(render_kernels_gen5[0])));
3118     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3119                                  sizeof(render_kernels_gen6[0])));
3120
3121     if (IS_GEN7(i965->intel.device_id))
3122         memcpy(render_state->render_kernels,
3123                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
3124                sizeof(render_state->render_kernels));
3125     else if (IS_GEN6(i965->intel.device_id))
3126         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3127     else if (IS_IRONLAKE(i965->intel.device_id))
3128         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3129     else
3130         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3131
3132     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3133         struct i965_kernel *kernel = &render_state->render_kernels[i];
3134
3135         if (!kernel->size)
3136             continue;
3137
3138         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3139                                   kernel->name, 
3140                                   kernel->size, 0x1000);
3141         assert(kernel->bo);
3142         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3143     }
3144
3145     /* constant buffer */
3146     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3147                       "constant buffer",
3148                       4096, 64);
3149     assert(render_state->curbe.bo);
3150
3151     if (IS_HSW_GT1(i965->intel.device_id)) {
3152         render_state->max_wm_threads = 102;
3153     } else if (IS_HSW_GT2(i965->intel.device_id)) {
3154         render_state->max_wm_threads = 204;
3155     } else if (IS_HSW_GT3(i965->intel.device_id)) {
3156         render_state->max_wm_threads = 408;
3157     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
3158         render_state->max_wm_threads = 48;
3159     } else if (IS_IVB_GT2(i965->intel.device_id)) {
3160         render_state->max_wm_threads = 172;
3161     } else if (IS_SNB_GT1(i965->intel.device_id)) {
3162         render_state->max_wm_threads = 40;
3163     } else if (IS_SNB_GT2(i965->intel.device_id)) {
3164         render_state->max_wm_threads = 80;
3165     } else if (IS_IRONLAKE(i965->intel.device_id)) {
3166         render_state->max_wm_threads = 72; /* 12 * 6 */
3167     } else if (IS_G4X(i965->intel.device_id)) {
3168         render_state->max_wm_threads = 50; /* 12 * 5 */
3169     } else {
3170         /* should never get here !!! */
3171         assert(0);
3172     }
3173
3174     return true;
3175 }
3176
3177 void 
3178 i965_render_terminate(VADriverContextP ctx)
3179 {
3180     int i;
3181     struct i965_driver_data *i965 = i965_driver_data(ctx);
3182     struct i965_render_state *render_state = &i965->render_state;
3183
3184     dri_bo_unreference(render_state->curbe.bo);
3185     render_state->curbe.bo = NULL;
3186
3187     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3188         struct i965_kernel *kernel = &render_state->render_kernels[i];
3189         
3190         dri_bo_unreference(kernel->bo);
3191         kernel->bo = NULL;
3192     }
3193
3194     dri_bo_unreference(render_state->vb.vertex_buffer);
3195     render_state->vb.vertex_buffer = NULL;
3196     dri_bo_unreference(render_state->vs.state);
3197     render_state->vs.state = NULL;
3198     dri_bo_unreference(render_state->sf.state);
3199     render_state->sf.state = NULL;
3200     dri_bo_unreference(render_state->wm.sampler);
3201     render_state->wm.sampler = NULL;
3202     dri_bo_unreference(render_state->wm.state);
3203     render_state->wm.state = NULL;
3204     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3205     dri_bo_unreference(render_state->cc.viewport);
3206     render_state->cc.viewport = NULL;
3207     dri_bo_unreference(render_state->cc.state);
3208     render_state->cc.state = NULL;
3209     dri_bo_unreference(render_state->cc.blend);
3210     render_state->cc.blend = NULL;
3211     dri_bo_unreference(render_state->cc.depth_stencil);
3212     render_state->cc.depth_stencil = NULL;
3213
3214     if (render_state->draw_region) {
3215         dri_bo_unreference(render_state->draw_region->bo);
3216         free(render_state->draw_region);
3217         render_state->draw_region = NULL;
3218     }
3219 }
3220