OSDN Git Service

Use the BDW surface/sampler state and memory address allocation for rendering
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 /*TODO: Modify the shader for GEN8.
151  * Now it only uses the shader for gen7/haswell
152  */
153 /* Programs for Gen8 */
154 static const uint32_t sf_kernel_static_gen8[][4] = 
155 {
156 };
157 static const uint32_t ps_kernel_static_gen8[][4] = {
158 #include "shaders/render/exa_wm_src_affine.g7b"
159 #include "shaders/render/exa_wm_src_sample_planar.g7b"
160 #include "shaders/render/exa_wm_yuv_rgb.g7b"
161 #include "shaders/render/exa_wm_write.g7b"
162 };
163
164 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
165 #include "shaders/render/exa_wm_src_affine.g7b"
166 #include "shaders/render/exa_wm_src_sample_argb.g7b"
167 #include "shaders/render/exa_wm_write.g7b"
168 };
169
170
171 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \
172                                 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
173
174 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
175 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
176
177 static uint32_t float_to_uint (float f) 
178 {
179     union {
180         uint32_t i; 
181         float f;
182     } x;
183
184     x.f = f;
185     return x.i;
186 }
187
188 enum 
189 {
190     SF_KERNEL = 0,
191     PS_KERNEL,
192     PS_SUBPIC_KERNEL
193 };
194
195 static struct i965_kernel render_kernels_gen4[] = {
196     {
197         "SF",
198         SF_KERNEL,
199         sf_kernel_static,
200         sizeof(sf_kernel_static),
201         NULL
202     },
203     {
204         "PS",
205         PS_KERNEL,
206         ps_kernel_static,
207         sizeof(ps_kernel_static),
208         NULL
209     },
210
211     {
212         "PS_SUBPIC",
213         PS_SUBPIC_KERNEL,
214         ps_subpic_kernel_static,
215         sizeof(ps_subpic_kernel_static),
216         NULL
217     }
218 };
219
220 static struct i965_kernel render_kernels_gen5[] = {
221     {
222         "SF",
223         SF_KERNEL,
224         sf_kernel_static_gen5,
225         sizeof(sf_kernel_static_gen5),
226         NULL
227     },
228     {
229         "PS",
230         PS_KERNEL,
231         ps_kernel_static_gen5,
232         sizeof(ps_kernel_static_gen5),
233         NULL
234     },
235
236     {
237         "PS_SUBPIC",
238         PS_SUBPIC_KERNEL,
239         ps_subpic_kernel_static_gen5,
240         sizeof(ps_subpic_kernel_static_gen5),
241         NULL
242     }
243 };
244
245 static struct i965_kernel render_kernels_gen6[] = {
246     {
247         "SF",
248         SF_KERNEL,
249         sf_kernel_static_gen6,
250         sizeof(sf_kernel_static_gen6),
251         NULL
252     },
253     {
254         "PS",
255         PS_KERNEL,
256         ps_kernel_static_gen6,
257         sizeof(ps_kernel_static_gen6),
258         NULL
259     },
260
261     {
262         "PS_SUBPIC",
263         PS_SUBPIC_KERNEL,
264         ps_subpic_kernel_static_gen6,
265         sizeof(ps_subpic_kernel_static_gen6),
266         NULL
267     }
268 };
269
270 static struct i965_kernel render_kernels_gen7[] = {
271     {
272         "SF",
273         SF_KERNEL,
274         sf_kernel_static_gen7,
275         sizeof(sf_kernel_static_gen7),
276         NULL
277     },
278     {
279         "PS",
280         PS_KERNEL,
281         ps_kernel_static_gen7,
282         sizeof(ps_kernel_static_gen7),
283         NULL
284     },
285
286     {
287         "PS_SUBPIC",
288         PS_SUBPIC_KERNEL,
289         ps_subpic_kernel_static_gen7,
290         sizeof(ps_subpic_kernel_static_gen7),
291         NULL
292     }
293 };
294
295 static struct i965_kernel render_kernels_gen7_haswell[] = {
296     {
297         "SF",
298         SF_KERNEL,
299         sf_kernel_static_gen7,
300         sizeof(sf_kernel_static_gen7),
301         NULL
302     },
303     {
304         "PS",
305         PS_KERNEL,
306         ps_kernel_static_gen7_haswell,
307         sizeof(ps_kernel_static_gen7_haswell),
308         NULL
309     },
310
311     {
312         "PS_SUBPIC",
313         PS_SUBPIC_KERNEL,
314         ps_subpic_kernel_static_gen7,
315         sizeof(ps_subpic_kernel_static_gen7),
316         NULL
317     }
318 };
319
320 static struct i965_kernel render_kernels_gen8[] = {
321     {
322         "SF",
323         SF_KERNEL,
324         sf_kernel_static_gen8,
325         sizeof(sf_kernel_static_gen8),
326         NULL
327     },
328     {
329         "PS",
330         PS_KERNEL,
331         ps_kernel_static_gen8,
332         sizeof(ps_kernel_static_gen8),
333         NULL
334     },
335
336     {
337         "PS_SUBPIC",
338         PS_SUBPIC_KERNEL,
339         ps_subpic_kernel_static_gen8,
340         sizeof(ps_subpic_kernel_static_gen8),
341         NULL
342     }
343 };
344
345 #define URB_VS_ENTRIES        8
346 #define URB_VS_ENTRY_SIZE     1
347
348 #define URB_GS_ENTRIES        0
349 #define URB_GS_ENTRY_SIZE     0
350
351 #define URB_CLIP_ENTRIES      0
352 #define URB_CLIP_ENTRY_SIZE   0
353
354 #define URB_SF_ENTRIES        1
355 #define URB_SF_ENTRY_SIZE     2
356
357 #define URB_CS_ENTRIES        4
358 #define URB_CS_ENTRY_SIZE     4
359
360 static float yuv_to_rgb_bt601[3][4] = {
361 {1.164,         0,      1.596,          -0.06275,},
362 {1.164,         -0.392, -0.813,         -0.50196,},
363 {1.164,         2.017,  0,              -0.50196,},
364 };
365
366 static float yuv_to_rgb_bt709[3][4] = {
367 {1.164,         0,      1.793,          -0.06275,},
368 {1.164,         -0.213, -0.533,         -0.50196,},
369 {1.164,         2.112,  0,              -0.50196,},
370 };
371
372 static float yuv_to_rgb_smpte_240[3][4] = {
373 {1.164,         0,      1.794,          -0.06275,},
374 {1.164,         -0.258, -0.5425,        -0.50196,},
375 {1.164,         2.078,  0,              -0.50196,},
376 };
377
378 static void
379 i965_render_vs_unit(VADriverContextP ctx)
380 {
381     struct i965_driver_data *i965 = i965_driver_data(ctx);
382     struct i965_render_state *render_state = &i965->render_state;
383     struct i965_vs_unit_state *vs_state;
384
385     dri_bo_map(render_state->vs.state, 1);
386     assert(render_state->vs.state->virtual);
387     vs_state = render_state->vs.state->virtual;
388     memset(vs_state, 0, sizeof(*vs_state));
389
390     if (IS_IRONLAKE(i965->intel.device_id))
391         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
392     else
393         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
394
395     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
396     vs_state->vs6.vs_enable = 0;
397     vs_state->vs6.vert_cache_disable = 1;
398     
399     dri_bo_unmap(render_state->vs.state);
400 }
401
402 static void
403 i965_render_sf_unit(VADriverContextP ctx)
404 {
405     struct i965_driver_data *i965 = i965_driver_data(ctx);
406     struct i965_render_state *render_state = &i965->render_state;
407     struct i965_sf_unit_state *sf_state;
408
409     dri_bo_map(render_state->sf.state, 1);
410     assert(render_state->sf.state->virtual);
411     sf_state = render_state->sf.state->virtual;
412     memset(sf_state, 0, sizeof(*sf_state));
413
414     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
415     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
416
417     sf_state->sf1.single_program_flow = 1; /* XXX */
418     sf_state->sf1.binding_table_entry_count = 0;
419     sf_state->sf1.thread_priority = 0;
420     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
421     sf_state->sf1.illegal_op_exception_enable = 1;
422     sf_state->sf1.mask_stack_exception_enable = 1;
423     sf_state->sf1.sw_exception_enable = 1;
424
425     /* scratch space is not used in our kernel */
426     sf_state->thread2.per_thread_scratch_space = 0;
427     sf_state->thread2.scratch_space_base_pointer = 0;
428
429     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
430     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
431     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
432     sf_state->thread3.urb_entry_read_offset = 0;
433     sf_state->thread3.dispatch_grf_start_reg = 3;
434
435     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
436     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
437     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
438     sf_state->thread4.stats_enable = 1;
439
440     sf_state->sf5.viewport_transform = 0; /* skip viewport */
441
442     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
443     sf_state->sf6.scissor = 0;
444
445     sf_state->sf7.trifan_pv = 2;
446
447     sf_state->sf6.dest_org_vbias = 0x8;
448     sf_state->sf6.dest_org_hbias = 0x8;
449
450     dri_bo_emit_reloc(render_state->sf.state,
451                       I915_GEM_DOMAIN_INSTRUCTION, 0,
452                       sf_state->thread0.grf_reg_count << 1,
453                       offsetof(struct i965_sf_unit_state, thread0),
454                       render_state->render_kernels[SF_KERNEL].bo);
455
456     dri_bo_unmap(render_state->sf.state);
457 }
458
459 static void 
460 i965_render_sampler(VADriverContextP ctx)
461 {
462     struct i965_driver_data *i965 = i965_driver_data(ctx);
463     struct i965_render_state *render_state = &i965->render_state;
464     struct i965_sampler_state *sampler_state;
465     int i;
466     
467     assert(render_state->wm.sampler_count > 0);
468     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
469
470     dri_bo_map(render_state->wm.sampler, 1);
471     assert(render_state->wm.sampler->virtual);
472     sampler_state = render_state->wm.sampler->virtual;
473     for (i = 0; i < render_state->wm.sampler_count; i++) {
474         memset(sampler_state, 0, sizeof(*sampler_state));
475         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
476         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
477         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
478         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
479         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
480         sampler_state++;
481     }
482
483     dri_bo_unmap(render_state->wm.sampler);
484 }
485 static void
486 i965_subpic_render_wm_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_wm_unit_state *wm_state;
491
492     assert(render_state->wm.sampler);
493
494     dri_bo_map(render_state->wm.state, 1);
495     assert(render_state->wm.state->virtual);
496     wm_state = render_state->wm.state->virtual;
497     memset(wm_state, 0, sizeof(*wm_state));
498
499     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
500     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
501
502     wm_state->thread1.single_program_flow = 1; /* XXX */
503
504     if (IS_IRONLAKE(i965->intel.device_id))
505         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
506     else
507         wm_state->thread1.binding_table_entry_count = 7;
508
509     wm_state->thread2.scratch_space_base_pointer = 0;
510     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
511
512     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
513     wm_state->thread3.const_urb_entry_read_length = 4;
514     wm_state->thread3.const_urb_entry_read_offset = 0;
515     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
516     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
517
518     wm_state->wm4.stats_enable = 0;
519     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
520
521     if (IS_IRONLAKE(i965->intel.device_id)) {
522         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
523     } else {
524         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
525     }
526
527     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
528     wm_state->wm5.thread_dispatch_enable = 1;
529     wm_state->wm5.enable_16_pix = 1;
530     wm_state->wm5.enable_8_pix = 0;
531     wm_state->wm5.early_depth_test = 1;
532
533     dri_bo_emit_reloc(render_state->wm.state,
534                       I915_GEM_DOMAIN_INSTRUCTION, 0,
535                       wm_state->thread0.grf_reg_count << 1,
536                       offsetof(struct i965_wm_unit_state, thread0),
537                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
538
539     dri_bo_emit_reloc(render_state->wm.state,
540                       I915_GEM_DOMAIN_INSTRUCTION, 0,
541                       wm_state->wm4.sampler_count << 2,
542                       offsetof(struct i965_wm_unit_state, wm4),
543                       render_state->wm.sampler);
544
545     dri_bo_unmap(render_state->wm.state);
546 }
547
548
549 static void
550 i965_render_wm_unit(VADriverContextP ctx)
551 {
552     struct i965_driver_data *i965 = i965_driver_data(ctx);
553     struct i965_render_state *render_state = &i965->render_state;
554     struct i965_wm_unit_state *wm_state;
555
556     assert(render_state->wm.sampler);
557
558     dri_bo_map(render_state->wm.state, 1);
559     assert(render_state->wm.state->virtual);
560     wm_state = render_state->wm.state->virtual;
561     memset(wm_state, 0, sizeof(*wm_state));
562
563     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
564     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
565
566     wm_state->thread1.single_program_flow = 1; /* XXX */
567
568     if (IS_IRONLAKE(i965->intel.device_id))
569         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
570     else
571         wm_state->thread1.binding_table_entry_count = 7;
572
573     wm_state->thread2.scratch_space_base_pointer = 0;
574     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
575
576     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
577     wm_state->thread3.const_urb_entry_read_length = 4;
578     wm_state->thread3.const_urb_entry_read_offset = 0;
579     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
580     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
581
582     wm_state->wm4.stats_enable = 0;
583     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
584
585     if (IS_IRONLAKE(i965->intel.device_id)) {
586         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
587     } else {
588         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
589     }
590
591     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
592     wm_state->wm5.thread_dispatch_enable = 1;
593     wm_state->wm5.enable_16_pix = 1;
594     wm_state->wm5.enable_8_pix = 0;
595     wm_state->wm5.early_depth_test = 1;
596
597     dri_bo_emit_reloc(render_state->wm.state,
598                       I915_GEM_DOMAIN_INSTRUCTION, 0,
599                       wm_state->thread0.grf_reg_count << 1,
600                       offsetof(struct i965_wm_unit_state, thread0),
601                       render_state->render_kernels[PS_KERNEL].bo);
602
603     dri_bo_emit_reloc(render_state->wm.state,
604                       I915_GEM_DOMAIN_INSTRUCTION, 0,
605                       wm_state->wm4.sampler_count << 2,
606                       offsetof(struct i965_wm_unit_state, wm4),
607                       render_state->wm.sampler);
608
609     dri_bo_unmap(render_state->wm.state);
610 }
611
612 static void 
613 i965_render_cc_viewport(VADriverContextP ctx)
614 {
615     struct i965_driver_data *i965 = i965_driver_data(ctx);
616     struct i965_render_state *render_state = &i965->render_state;
617     struct i965_cc_viewport *cc_viewport;
618
619     dri_bo_map(render_state->cc.viewport, 1);
620     assert(render_state->cc.viewport->virtual);
621     cc_viewport = render_state->cc.viewport->virtual;
622     memset(cc_viewport, 0, sizeof(*cc_viewport));
623     
624     cc_viewport->min_depth = -1.e35;
625     cc_viewport->max_depth = 1.e35;
626
627     dri_bo_unmap(render_state->cc.viewport);
628 }
629
630 static void 
631 i965_subpic_render_cc_unit(VADriverContextP ctx)
632 {
633     struct i965_driver_data *i965 = i965_driver_data(ctx);
634     struct i965_render_state *render_state = &i965->render_state;
635     struct i965_cc_unit_state *cc_state;
636
637     assert(render_state->cc.viewport);
638
639     dri_bo_map(render_state->cc.state, 1);
640     assert(render_state->cc.state->virtual);
641     cc_state = render_state->cc.state->virtual;
642     memset(cc_state, 0, sizeof(*cc_state));
643
644     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
645     cc_state->cc2.depth_test = 0;       /* disable depth test */
646     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
647     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
648     cc_state->cc3.blend_enable = 1;     /* enable color blend */
649     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
650     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
651     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
652     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
653
654     cc_state->cc5.dither_enable = 0;    /* disable dither */
655     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
656     cc_state->cc5.statistics_enable = 1;
657     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
658     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
659     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
660
661     cc_state->cc6.clamp_post_alpha_blend = 0; 
662     cc_state->cc6.clamp_pre_alpha_blend  =0; 
663     
664     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
665     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
666     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
667     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
668    
669     /*alpha test reference*/
670     cc_state->cc7.alpha_ref.f =0.0 ;
671
672
673     dri_bo_emit_reloc(render_state->cc.state,
674                       I915_GEM_DOMAIN_INSTRUCTION, 0,
675                       0,
676                       offsetof(struct i965_cc_unit_state, cc4),
677                       render_state->cc.viewport);
678
679     dri_bo_unmap(render_state->cc.state);
680 }
681
682
683 static void 
684 i965_render_cc_unit(VADriverContextP ctx)
685 {
686     struct i965_driver_data *i965 = i965_driver_data(ctx);
687     struct i965_render_state *render_state = &i965->render_state;
688     struct i965_cc_unit_state *cc_state;
689
690     assert(render_state->cc.viewport);
691
692     dri_bo_map(render_state->cc.state, 1);
693     assert(render_state->cc.state->virtual);
694     cc_state = render_state->cc.state->virtual;
695     memset(cc_state, 0, sizeof(*cc_state));
696
697     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
698     cc_state->cc2.depth_test = 0;       /* disable depth test */
699     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
700     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
701     cc_state->cc3.blend_enable = 0;     /* disable color blend */
702     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
703     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
704
705     cc_state->cc5.dither_enable = 0;    /* disable dither */
706     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
707     cc_state->cc5.statistics_enable = 1;
708     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
709     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
710     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
711
712     dri_bo_emit_reloc(render_state->cc.state,
713                       I915_GEM_DOMAIN_INSTRUCTION, 0,
714                       0,
715                       offsetof(struct i965_cc_unit_state, cc4),
716                       render_state->cc.viewport);
717
718     dri_bo_unmap(render_state->cc.state);
719 }
720
721 static void
722 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
723 {
724     switch (tiling) {
725     case I915_TILING_NONE:
726         ss->ss3.tiled_surface = 0;
727         ss->ss3.tile_walk = 0;
728         break;
729     case I915_TILING_X:
730         ss->ss3.tiled_surface = 1;
731         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
732         break;
733     case I915_TILING_Y:
734         ss->ss3.tiled_surface = 1;
735         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
736         break;
737     }
738 }
739
740 static void
741 i965_render_set_surface_state(
742     struct i965_surface_state *ss,
743     dri_bo                    *bo,
744     unsigned long              offset,
745     unsigned int               width,
746     unsigned int               height,
747     unsigned int               pitch,
748     unsigned int               format,
749     unsigned int               flags
750 )
751 {
752     unsigned int tiling;
753     unsigned int swizzle;
754
755     memset(ss, 0, sizeof(*ss));
756
757     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
758     case I965_PP_FLAG_BOTTOM_FIELD:
759         ss->ss0.vert_line_stride_ofs = 1;
760         /* fall-through */
761     case I965_PP_FLAG_TOP_FIELD:
762         ss->ss0.vert_line_stride = 1;
763         height /= 2;
764         break;
765     }
766
767     ss->ss0.surface_type = I965_SURFACE_2D;
768     ss->ss0.surface_format = format;
769     ss->ss0.color_blend = 1;
770
771     ss->ss1.base_addr = bo->offset + offset;
772
773     ss->ss2.width = width - 1;
774     ss->ss2.height = height - 1;
775
776     ss->ss3.pitch = pitch - 1;
777
778     dri_bo_get_tiling(bo, &tiling, &swizzle);
779     i965_render_set_surface_tiling(ss, tiling);
780 }
781
782 static void
783 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
784 {
785    switch (tiling) {
786    case I915_TILING_NONE:
787       ss->ss0.tiled_surface = 0;
788       ss->ss0.tile_walk = 0;
789       break;
790    case I915_TILING_X:
791       ss->ss0.tiled_surface = 1;
792       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
793       break;
794    case I915_TILING_Y:
795       ss->ss0.tiled_surface = 1;
796       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
797       break;
798    }
799 }
800
801 static void
802 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
803 {
804    switch (tiling) {
805    case I915_TILING_NONE:
806       ss->ss0.tiled_surface = 0;
807       ss->ss0.tile_walk = 0;
808       break;
809    case I915_TILING_X:
810       ss->ss0.tiled_surface = 1;
811       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
812       break;
813    case I915_TILING_Y:
814       ss->ss0.tiled_surface = 1;
815       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
816       break;
817    }
818 }
819
820 /* Set "Shader Channel Select" */
821 void
822 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
823 {
824     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
825     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
826     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
827     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
828 }
829
830 /* Set "Shader Channel Select" for GEN8+ */
831 void
832 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
833 {
834     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
835     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
836     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
837     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
838 }
839
840 static void
841 gen7_render_set_surface_state(
842     struct gen7_surface_state *ss,
843     dri_bo                    *bo,
844     unsigned long              offset,
845     int                        width,
846     int                        height,
847     int                        pitch,
848     int                        format,
849     unsigned int               flags
850 )
851 {
852     unsigned int tiling;
853     unsigned int swizzle;
854
855     memset(ss, 0, sizeof(*ss));
856
857     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
858     case I965_PP_FLAG_BOTTOM_FIELD:
859         ss->ss0.vert_line_stride_ofs = 1;
860         /* fall-through */
861     case I965_PP_FLAG_TOP_FIELD:
862         ss->ss0.vert_line_stride = 1;
863         height /= 2;
864         break;
865     }
866
867     ss->ss0.surface_type = I965_SURFACE_2D;
868     ss->ss0.surface_format = format;
869
870     ss->ss1.base_addr = bo->offset + offset;
871
872     ss->ss2.width = width - 1;
873     ss->ss2.height = height - 1;
874
875     ss->ss3.pitch = pitch - 1;
876
877     dri_bo_get_tiling(bo, &tiling, &swizzle);
878     gen7_render_set_surface_tiling(ss, tiling);
879 }
880
881
882 static void
883 gen8_render_set_surface_state(
884     struct gen8_surface_state *ss,
885     dri_bo                    *bo,
886     unsigned long              offset,
887     int                        width,
888     int                        height,
889     int                        pitch,
890     int                        format,
891     unsigned int               flags
892 )
893 {
894     unsigned int tiling;
895     unsigned int swizzle;
896
897     memset(ss, 0, sizeof(*ss));
898
899     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
900     case I965_PP_FLAG_BOTTOM_FIELD:
901         ss->ss0.vert_line_stride_ofs = 1;
902         /* fall-through */
903     case I965_PP_FLAG_TOP_FIELD:
904         ss->ss0.vert_line_stride = 1;
905         height /= 2;
906         break;
907     }
908
909     ss->ss0.surface_type = I965_SURFACE_2D;
910     ss->ss0.surface_format = format;
911
912     ss->ss8.base_addr = bo->offset + offset;
913
914     ss->ss2.width = width - 1;
915     ss->ss2.height = height - 1;
916
917     ss->ss3.pitch = pitch - 1;
918
919     dri_bo_get_tiling(bo, &tiling, &swizzle);
920     gen8_render_set_surface_tiling(ss, tiling);
921 }
922
923 static void
924 i965_render_src_surface_state(
925     VADriverContextP ctx, 
926     int              index,
927     dri_bo          *region,
928     unsigned long    offset,
929     int              w,
930     int              h,
931     int              pitch,
932     int              format,
933     unsigned int     flags
934 )
935 {
936     struct i965_driver_data *i965 = i965_driver_data(ctx);  
937     struct i965_render_state *render_state = &i965->render_state;
938     void *ss;
939     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
940
941     assert(index < MAX_RENDER_SURFACES);
942
943     dri_bo_map(ss_bo, 1);
944     assert(ss_bo->virtual);
945     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
946
947     if (IS_GEN8(i965->intel.device_id)) {
948         gen8_render_set_surface_state(ss,
949                                       region, offset,
950                                       w, h,
951                                       pitch, format, flags);
952         gen8_render_set_surface_scs(ss);
953         dri_bo_emit_reloc(ss_bo,
954                           I915_GEM_DOMAIN_SAMPLER, 0,
955                           offset,
956                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
957                           region);
958     } else  if (IS_GEN7(i965->intel.device_id)) {
959         gen7_render_set_surface_state(ss,
960                                       region, offset,
961                                       w, h,
962                                       pitch, format, flags);
963         if (IS_HASWELL(i965->intel.device_id))
964             gen7_render_set_surface_scs(ss);
965         dri_bo_emit_reloc(ss_bo,
966                           I915_GEM_DOMAIN_SAMPLER, 0,
967                           offset,
968                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
969                           region);
970     } else {
971         i965_render_set_surface_state(ss,
972                                       region, offset,
973                                       w, h,
974                                       pitch, format, flags);
975         dri_bo_emit_reloc(ss_bo,
976                           I915_GEM_DOMAIN_SAMPLER, 0,
977                           offset,
978                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
979                           region);
980     }
981
982     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
983     dri_bo_unmap(ss_bo);
984     render_state->wm.sampler_count++;
985 }
986
987 static void
988 i965_render_src_surfaces_state(
989     VADriverContextP ctx,
990     struct object_surface *obj_surface,
991     unsigned int     flags
992 )
993 {
994     int region_pitch;
995     int rw, rh;
996     dri_bo *region;
997
998     region_pitch = obj_surface->width;
999     rw = obj_surface->orig_width;
1000     rh = obj_surface->orig_height;
1001     region = obj_surface->bo;
1002
1003     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
1004     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
1005
1006     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
1007         i965_render_src_surface_state(ctx, 3, region,
1008                                       region_pitch * obj_surface->y_cb_offset,
1009                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1010                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
1011         i965_render_src_surface_state(ctx, 4, region,
1012                                       region_pitch * obj_surface->y_cb_offset,
1013                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1014                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
1015     } else {
1016         i965_render_src_surface_state(ctx, 3, region,
1017                                       region_pitch * obj_surface->y_cb_offset,
1018                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1019                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
1020         i965_render_src_surface_state(ctx, 4, region,
1021                                       region_pitch * obj_surface->y_cb_offset,
1022                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1023                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1024         i965_render_src_surface_state(ctx, 5, region,
1025                                       region_pitch * obj_surface->y_cr_offset,
1026                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1027                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
1028         i965_render_src_surface_state(ctx, 6, region,
1029                                       region_pitch * obj_surface->y_cr_offset,
1030                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1031                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1032     }
1033 }
1034
1035 static void
1036 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
1037                                       struct object_surface *obj_surface)
1038 {
1039     dri_bo *subpic_region;
1040     unsigned int index = obj_surface->subpic_render_idx;
1041     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1042     struct object_image *obj_image = obj_subpic->obj_image;
1043
1044     assert(obj_surface);
1045     assert(obj_surface->bo);
1046     subpic_region = obj_image->bo;
1047     /*subpicture surface*/
1048     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1049     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1050 }
1051
1052 static void
1053 i965_render_dest_surface_state(VADriverContextP ctx, int index)
1054 {
1055     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1056     struct i965_render_state *render_state = &i965->render_state;
1057     struct intel_region *dest_region = render_state->draw_region;
1058     void *ss;
1059     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
1060     int format;
1061     assert(index < MAX_RENDER_SURFACES);
1062
1063     if (dest_region->cpp == 2) {
1064         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
1065     } else {
1066         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
1067     }
1068
1069     dri_bo_map(ss_bo, 1);
1070     assert(ss_bo->virtual);
1071     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
1072
1073     if (IS_GEN8(i965->intel.device_id)) {
1074         gen8_render_set_surface_state(ss,
1075                                       dest_region->bo, 0,
1076                                       dest_region->width, dest_region->height,
1077                                       dest_region->pitch, format, 0);
1078         gen8_render_set_surface_scs(ss);
1079         dri_bo_emit_reloc(ss_bo,
1080                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1081                           0,
1082                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
1083                           dest_region->bo);
1084     } else if (IS_GEN7(i965->intel.device_id)) {
1085         gen7_render_set_surface_state(ss,
1086                                       dest_region->bo, 0,
1087                                       dest_region->width, dest_region->height,
1088                                       dest_region->pitch, format, 0);
1089         if (IS_HASWELL(i965->intel.device_id))
1090             gen7_render_set_surface_scs(ss);
1091         dri_bo_emit_reloc(ss_bo,
1092                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1093                           0,
1094                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1095                           dest_region->bo);
1096     } else {
1097         i965_render_set_surface_state(ss,
1098                                       dest_region->bo, 0,
1099                                       dest_region->width, dest_region->height,
1100                                       dest_region->pitch, format, 0);
1101         dri_bo_emit_reloc(ss_bo,
1102                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1103                           0,
1104                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1105                           dest_region->bo);
1106     }
1107
1108     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1109     dri_bo_unmap(ss_bo);
1110 }
1111
1112 static void
1113 i965_fill_vertex_buffer(
1114     VADriverContextP ctx,
1115     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1116     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1117 )
1118 {
1119     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1120     float vb[12];
1121
1122     enum { X1, Y1, X2, Y2 };
1123
1124     static const unsigned int g_rotation_indices[][6] = {
1125         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1126         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1127         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1128         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1129     };
1130
1131     const unsigned int * const rotation_indices =
1132         g_rotation_indices[i965->rotation_attrib->value];
1133
1134     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1135     vb[1]  = tex_coords[rotation_indices[1]];
1136     vb[2]  = vid_coords[X2];
1137     vb[3]  = vid_coords[Y2];
1138
1139     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1140     vb[5]  = tex_coords[rotation_indices[3]];
1141     vb[6]  = vid_coords[X1];
1142     vb[7]  = vid_coords[Y2];
1143
1144     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1145     vb[9]  = tex_coords[rotation_indices[5]];
1146     vb[10] = vid_coords[X1];
1147     vb[11] = vid_coords[Y1];
1148
1149     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1150 }
1151
1152 static void 
1153 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1154                                  struct object_surface *obj_surface,
1155                                  const VARectangle *output_rect)
1156 {    
1157     unsigned int index = obj_surface->subpic_render_idx;
1158     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1159     float tex_coords[4], vid_coords[4];
1160     VARectangle dst_rect;
1161
1162     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1163         dst_rect = obj_subpic->dst_rect;
1164     else {
1165         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1166         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1167         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1168         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1169         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1170         dst_rect.height = sy * obj_subpic->dst_rect.height;
1171     }
1172
1173     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1174     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1175     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1176     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1177
1178     vid_coords[0] = dst_rect.x;
1179     vid_coords[1] = dst_rect.y;
1180     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1181     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1182
1183     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1184 }
1185
1186 static void 
1187 i965_render_upload_vertex(
1188     VADriverContextP   ctx,
1189     struct object_surface *obj_surface,
1190     const VARectangle *src_rect,
1191     const VARectangle *dst_rect
1192 )
1193 {
1194     struct i965_driver_data *i965 = i965_driver_data(ctx);
1195     struct i965_render_state *render_state = &i965->render_state;
1196     struct intel_region *dest_region = render_state->draw_region;
1197     float tex_coords[4], vid_coords[4];
1198     int width, height;
1199
1200     width  = obj_surface->orig_width;
1201     height = obj_surface->orig_height;
1202
1203     tex_coords[0] = (float)src_rect->x / width;
1204     tex_coords[1] = (float)src_rect->y / height;
1205     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1206     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1207
1208     vid_coords[0] = dest_region->x + dst_rect->x;
1209     vid_coords[1] = dest_region->y + dst_rect->y;
1210     vid_coords[2] = vid_coords[0] + dst_rect->width;
1211     vid_coords[3] = vid_coords[1] + dst_rect->height;
1212
1213     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1214 }
1215
1216 #define PI  3.1415926
1217
1218 static void
1219 i965_render_upload_constants(VADriverContextP ctx,
1220                              struct object_surface *obj_surface,
1221                              unsigned int flags)
1222 {
1223     struct i965_driver_data *i965 = i965_driver_data(ctx);
1224     struct i965_render_state *render_state = &i965->render_state;
1225     unsigned short *constant_buffer;
1226     float *color_balance_base;
1227     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1228     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1229     float hue = (float)i965->hue_attrib->value / 180 * PI;
1230     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1231     float *yuv_to_rgb;
1232     unsigned int color_flag;
1233
1234     dri_bo_map(render_state->curbe.bo, 1);
1235     assert(render_state->curbe.bo->virtual);
1236     constant_buffer = render_state->curbe.bo->virtual;
1237
1238     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1239         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1240
1241         constant_buffer[0] = 2;
1242     } else {
1243         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1244             constant_buffer[0] = 1;
1245         else
1246             constant_buffer[0] = 0;
1247     }
1248
1249     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1250         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1251         i965->hue_attrib->value == DEFAULT_HUE &&
1252         i965->saturation_attrib->value == DEFAULT_SATURATION)
1253         constant_buffer[1] = 1; /* skip color balance transformation */
1254     else
1255         constant_buffer[1] = 0;
1256
1257     color_balance_base = (float *)constant_buffer + 4;
1258     *color_balance_base++ = contrast;
1259     *color_balance_base++ = brightness;
1260     *color_balance_base++ = cos(hue) * contrast * saturation;
1261     *color_balance_base++ = sin(hue) * contrast * saturation;
1262
1263     color_flag = flags & VA_SRC_COLOR_MASK;
1264     yuv_to_rgb = (float *)constant_buffer + 8;
1265     if (color_flag == VA_SRC_BT709)
1266         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1267     else if (color_flag == VA_SRC_SMPTE_240)
1268         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1269     else
1270         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1271
1272     dri_bo_unmap(render_state->curbe.bo);
1273 }
1274
1275 static void
1276 i965_subpic_render_upload_constants(VADriverContextP ctx,
1277                                     struct object_surface *obj_surface)
1278 {
1279     struct i965_driver_data *i965 = i965_driver_data(ctx);
1280     struct i965_render_state *render_state = &i965->render_state;
1281     float *constant_buffer;
1282     float global_alpha = 1.0;
1283     unsigned int index = obj_surface->subpic_render_idx;
1284     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1285     
1286     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1287         global_alpha = obj_subpic->global_alpha;
1288     }
1289
1290     dri_bo_map(render_state->curbe.bo, 1);
1291
1292     assert(render_state->curbe.bo->virtual);
1293     constant_buffer = render_state->curbe.bo->virtual;
1294     *constant_buffer = global_alpha;
1295
1296     dri_bo_unmap(render_state->curbe.bo);
1297 }
1298  
1299 static void
1300 i965_surface_render_state_setup(
1301     VADriverContextP   ctx,
1302     struct object_surface *obj_surface,
1303     const VARectangle *src_rect,
1304     const VARectangle *dst_rect,
1305     unsigned int       flags
1306 )
1307 {
1308     i965_render_vs_unit(ctx);
1309     i965_render_sf_unit(ctx);
1310     i965_render_dest_surface_state(ctx, 0);
1311     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1312     i965_render_sampler(ctx);
1313     i965_render_wm_unit(ctx);
1314     i965_render_cc_viewport(ctx);
1315     i965_render_cc_unit(ctx);
1316     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1317     i965_render_upload_constants(ctx, obj_surface, flags);
1318 }
1319
1320 static void
1321 i965_subpic_render_state_setup(
1322     VADriverContextP   ctx,
1323     struct object_surface *obj_surface,
1324     const VARectangle *src_rect,
1325     const VARectangle *dst_rect
1326 )
1327 {
1328     i965_render_vs_unit(ctx);
1329     i965_render_sf_unit(ctx);
1330     i965_render_dest_surface_state(ctx, 0);
1331     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1332     i965_render_sampler(ctx);
1333     i965_subpic_render_wm_unit(ctx);
1334     i965_render_cc_viewport(ctx);
1335     i965_subpic_render_cc_unit(ctx);
1336     i965_subpic_render_upload_constants(ctx, obj_surface);
1337     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1338 }
1339
1340
1341 static void
1342 i965_render_pipeline_select(VADriverContextP ctx)
1343 {
1344     struct i965_driver_data *i965 = i965_driver_data(ctx);
1345     struct intel_batchbuffer *batch = i965->batch;
1346  
1347     BEGIN_BATCH(batch, 1);
1348     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1349     ADVANCE_BATCH(batch);
1350 }
1351
1352 static void
1353 i965_render_state_sip(VADriverContextP ctx)
1354 {
1355     struct i965_driver_data *i965 = i965_driver_data(ctx);
1356     struct intel_batchbuffer *batch = i965->batch;
1357
1358     BEGIN_BATCH(batch, 2);
1359     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1360     OUT_BATCH(batch, 0);
1361     ADVANCE_BATCH(batch);
1362 }
1363
1364 static void
1365 i965_render_state_base_address(VADriverContextP ctx)
1366 {
1367     struct i965_driver_data *i965 = i965_driver_data(ctx);
1368     struct intel_batchbuffer *batch = i965->batch;
1369     struct i965_render_state *render_state = &i965->render_state;
1370
1371     if (IS_IRONLAKE(i965->intel.device_id)) {
1372         BEGIN_BATCH(batch, 8);
1373         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1374         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1375         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1376         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1377         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1378         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1379         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1380         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1381         ADVANCE_BATCH(batch);
1382     } else {
1383         BEGIN_BATCH(batch, 6);
1384         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1385         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1387         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1388         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1389         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1390         ADVANCE_BATCH(batch);
1391     }
1392 }
1393
1394 static void
1395 i965_render_binding_table_pointers(VADriverContextP ctx)
1396 {
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct intel_batchbuffer *batch = i965->batch;
1399
1400     BEGIN_BATCH(batch, 6);
1401     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1402     OUT_BATCH(batch, 0); /* vs */
1403     OUT_BATCH(batch, 0); /* gs */
1404     OUT_BATCH(batch, 0); /* clip */
1405     OUT_BATCH(batch, 0); /* sf */
1406     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1407     ADVANCE_BATCH(batch);
1408 }
1409
1410 static void 
1411 i965_render_constant_color(VADriverContextP ctx)
1412 {
1413     struct i965_driver_data *i965 = i965_driver_data(ctx);
1414     struct intel_batchbuffer *batch = i965->batch;
1415
1416     BEGIN_BATCH(batch, 5);
1417     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1418     OUT_BATCH(batch, float_to_uint(1.0));
1419     OUT_BATCH(batch, float_to_uint(0.0));
1420     OUT_BATCH(batch, float_to_uint(1.0));
1421     OUT_BATCH(batch, float_to_uint(1.0));
1422     ADVANCE_BATCH(batch);
1423 }
1424
1425 static void
1426 i965_render_pipelined_pointers(VADriverContextP ctx)
1427 {
1428     struct i965_driver_data *i965 = i965_driver_data(ctx);
1429     struct intel_batchbuffer *batch = i965->batch;
1430     struct i965_render_state *render_state = &i965->render_state;
1431
1432     BEGIN_BATCH(batch, 7);
1433     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1434     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1435     OUT_BATCH(batch, 0);  /* disable GS */
1436     OUT_BATCH(batch, 0);  /* disable CLIP */
1437     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1438     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1439     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1440     ADVANCE_BATCH(batch);
1441 }
1442
1443 static void
1444 i965_render_urb_layout(VADriverContextP ctx)
1445 {
1446     struct i965_driver_data *i965 = i965_driver_data(ctx);
1447     struct intel_batchbuffer *batch = i965->batch;
1448     int urb_vs_start, urb_vs_size;
1449     int urb_gs_start, urb_gs_size;
1450     int urb_clip_start, urb_clip_size;
1451     int urb_sf_start, urb_sf_size;
1452     int urb_cs_start, urb_cs_size;
1453
1454     urb_vs_start = 0;
1455     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1456     urb_gs_start = urb_vs_start + urb_vs_size;
1457     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1458     urb_clip_start = urb_gs_start + urb_gs_size;
1459     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1460     urb_sf_start = urb_clip_start + urb_clip_size;
1461     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1462     urb_cs_start = urb_sf_start + urb_sf_size;
1463     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1464
1465     BEGIN_BATCH(batch, 3);
1466     OUT_BATCH(batch, 
1467               CMD_URB_FENCE |
1468               UF0_CS_REALLOC |
1469               UF0_SF_REALLOC |
1470               UF0_CLIP_REALLOC |
1471               UF0_GS_REALLOC |
1472               UF0_VS_REALLOC |
1473               1);
1474     OUT_BATCH(batch, 
1475               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1476               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1477               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1478     OUT_BATCH(batch,
1479               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1480               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1481     ADVANCE_BATCH(batch);
1482 }
1483
1484 static void 
1485 i965_render_cs_urb_layout(VADriverContextP ctx)
1486 {
1487     struct i965_driver_data *i965 = i965_driver_data(ctx);
1488     struct intel_batchbuffer *batch = i965->batch;
1489
1490     BEGIN_BATCH(batch, 2);
1491     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1492     OUT_BATCH(batch,
1493               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1494               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1495     ADVANCE_BATCH(batch);
1496 }
1497
1498 static void
1499 i965_render_constant_buffer(VADriverContextP ctx)
1500 {
1501     struct i965_driver_data *i965 = i965_driver_data(ctx);
1502     struct intel_batchbuffer *batch = i965->batch;
1503     struct i965_render_state *render_state = &i965->render_state;
1504
1505     BEGIN_BATCH(batch, 2);
1506     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1507     OUT_RELOC(batch, render_state->curbe.bo,
1508               I915_GEM_DOMAIN_INSTRUCTION, 0,
1509               URB_CS_ENTRY_SIZE - 1);
1510     ADVANCE_BATCH(batch);    
1511 }
1512
1513 static void
1514 i965_render_drawing_rectangle(VADriverContextP ctx)
1515 {
1516     struct i965_driver_data *i965 = i965_driver_data(ctx);
1517     struct intel_batchbuffer *batch = i965->batch;
1518     struct i965_render_state *render_state = &i965->render_state;
1519     struct intel_region *dest_region = render_state->draw_region;
1520
1521     BEGIN_BATCH(batch, 4);
1522     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1523     OUT_BATCH(batch, 0x00000000);
1524     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1525     OUT_BATCH(batch, 0x00000000);         
1526     ADVANCE_BATCH(batch);
1527 }
1528
1529 static void
1530 i965_render_vertex_elements(VADriverContextP ctx)
1531 {
1532     struct i965_driver_data *i965 = i965_driver_data(ctx);
1533     struct intel_batchbuffer *batch = i965->batch;
1534
1535     if (IS_IRONLAKE(i965->intel.device_id)) {
1536         BEGIN_BATCH(batch, 5);
1537         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1538         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1539         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1540                   VE0_VALID |
1541                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1542                   (0 << VE0_OFFSET_SHIFT));
1543         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1544                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1545                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1546                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1547         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1548         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1549                   VE0_VALID |
1550                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1551                   (8 << VE0_OFFSET_SHIFT));
1552         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1553                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1554                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1555                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1556         ADVANCE_BATCH(batch);
1557     } else {
1558         BEGIN_BATCH(batch, 5);
1559         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1560         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1561         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1562                   VE0_VALID |
1563                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1564                   (0 << VE0_OFFSET_SHIFT));
1565         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1566                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1567                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1568                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1569                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1570         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1571         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1572                   VE0_VALID |
1573                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1574                   (8 << VE0_OFFSET_SHIFT));
1575         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1576                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1577                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1578                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1579                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1580         ADVANCE_BATCH(batch);
1581     }
1582 }
1583
1584 static void
1585 i965_render_upload_image_palette(
1586     VADriverContextP ctx,
1587     struct object_image *obj_image,
1588     unsigned int     alpha
1589 )
1590 {
1591     struct i965_driver_data *i965 = i965_driver_data(ctx);
1592     struct intel_batchbuffer *batch = i965->batch;
1593     unsigned int i;
1594
1595     assert(obj_image);
1596
1597     if (!obj_image)
1598         return;
1599
1600     if (obj_image->image.num_palette_entries == 0)
1601         return;
1602
1603     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1604     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1605     /*fill palette*/
1606     //int32_t out[16]; //0-23:color 23-31:alpha
1607     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1608         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1609     ADVANCE_BATCH(batch);
1610 }
1611
1612 static void
1613 i965_render_startup(VADriverContextP ctx)
1614 {
1615     struct i965_driver_data *i965 = i965_driver_data(ctx);
1616     struct intel_batchbuffer *batch = i965->batch;
1617     struct i965_render_state *render_state = &i965->render_state;
1618
1619     BEGIN_BATCH(batch, 11);
1620     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1621     OUT_BATCH(batch, 
1622               (0 << VB0_BUFFER_INDEX_SHIFT) |
1623               VB0_VERTEXDATA |
1624               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1625     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1626
1627     if (IS_IRONLAKE(i965->intel.device_id))
1628         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1629     else
1630         OUT_BATCH(batch, 3);
1631
1632     OUT_BATCH(batch, 0);
1633
1634     OUT_BATCH(batch, 
1635               CMD_3DPRIMITIVE |
1636               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1637               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1638               (0 << 9) |
1639               4);
1640     OUT_BATCH(batch, 3); /* vertex count per instance */
1641     OUT_BATCH(batch, 0); /* start vertex offset */
1642     OUT_BATCH(batch, 1); /* single instance */
1643     OUT_BATCH(batch, 0); /* start instance location */
1644     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1645     ADVANCE_BATCH(batch);
1646 }
1647
1648 static void 
1649 i965_clear_dest_region(VADriverContextP ctx)
1650 {
1651     struct i965_driver_data *i965 = i965_driver_data(ctx);
1652     struct intel_batchbuffer *batch = i965->batch;
1653     struct i965_render_state *render_state = &i965->render_state;
1654     struct intel_region *dest_region = render_state->draw_region;
1655     unsigned int blt_cmd, br13;
1656     int pitch;
1657
1658     blt_cmd = XY_COLOR_BLT_CMD;
1659     br13 = 0xf0 << 16;
1660     pitch = dest_region->pitch;
1661
1662     if (dest_region->cpp == 4) {
1663         br13 |= BR13_8888;
1664         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1665     } else {
1666         assert(dest_region->cpp == 2);
1667         br13 |= BR13_565;
1668     }
1669
1670     if (dest_region->tiling != I915_TILING_NONE) {
1671         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1672         pitch /= 4;
1673     }
1674
1675     br13 |= pitch;
1676
1677     if (IS_GEN6(i965->intel.device_id) ||
1678         IS_GEN7(i965->intel.device_id) ||
1679         IS_GEN8(i965->intel.device_id)) {
1680         intel_batchbuffer_start_atomic_blt(batch, 24);
1681         BEGIN_BLT_BATCH(batch, 6);
1682     } else {
1683         intel_batchbuffer_start_atomic(batch, 24);
1684         BEGIN_BATCH(batch, 6);
1685     }
1686
1687     OUT_BATCH(batch, blt_cmd);
1688     OUT_BATCH(batch, br13);
1689     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1690     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1691               (dest_region->x + dest_region->width));
1692     OUT_RELOC(batch, dest_region->bo, 
1693               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1694               0);
1695     OUT_BATCH(batch, 0x0);
1696     ADVANCE_BATCH(batch);
1697     intel_batchbuffer_end_atomic(batch);
1698 }
1699
1700 static void
1701 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1702 {
1703     struct i965_driver_data *i965 = i965_driver_data(ctx);
1704     struct intel_batchbuffer *batch = i965->batch;
1705
1706     i965_clear_dest_region(ctx);
1707     intel_batchbuffer_start_atomic(batch, 0x1000);
1708     intel_batchbuffer_emit_mi_flush(batch);
1709     i965_render_pipeline_select(ctx);
1710     i965_render_state_sip(ctx);
1711     i965_render_state_base_address(ctx);
1712     i965_render_binding_table_pointers(ctx);
1713     i965_render_constant_color(ctx);
1714     i965_render_pipelined_pointers(ctx);
1715     i965_render_urb_layout(ctx);
1716     i965_render_cs_urb_layout(ctx);
1717     i965_render_constant_buffer(ctx);
1718     i965_render_drawing_rectangle(ctx);
1719     i965_render_vertex_elements(ctx);
1720     i965_render_startup(ctx);
1721     intel_batchbuffer_end_atomic(batch);
1722 }
1723
1724 static void
1725 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1726 {
1727     struct i965_driver_data *i965 = i965_driver_data(ctx);
1728     struct intel_batchbuffer *batch = i965->batch;
1729
1730     intel_batchbuffer_start_atomic(batch, 0x1000);
1731     intel_batchbuffer_emit_mi_flush(batch);
1732     i965_render_pipeline_select(ctx);
1733     i965_render_state_sip(ctx);
1734     i965_render_state_base_address(ctx);
1735     i965_render_binding_table_pointers(ctx);
1736     i965_render_constant_color(ctx);
1737     i965_render_pipelined_pointers(ctx);
1738     i965_render_urb_layout(ctx);
1739     i965_render_cs_urb_layout(ctx);
1740     i965_render_constant_buffer(ctx);
1741     i965_render_drawing_rectangle(ctx);
1742     i965_render_vertex_elements(ctx);
1743     i965_render_startup(ctx);
1744     intel_batchbuffer_end_atomic(batch);
1745 }
1746
1747
1748 static void 
1749 i965_render_initialize(VADriverContextP ctx)
1750 {
1751     struct i965_driver_data *i965 = i965_driver_data(ctx);
1752     struct i965_render_state *render_state = &i965->render_state;
1753     dri_bo *bo;
1754
1755     /* VERTEX BUFFER */
1756     dri_bo_unreference(render_state->vb.vertex_buffer);
1757     bo = dri_bo_alloc(i965->intel.bufmgr,
1758                       "vertex buffer",
1759                       4096,
1760                       4096);
1761     assert(bo);
1762     render_state->vb.vertex_buffer = bo;
1763
1764     /* VS */
1765     dri_bo_unreference(render_state->vs.state);
1766     bo = dri_bo_alloc(i965->intel.bufmgr,
1767                       "vs state",
1768                       sizeof(struct i965_vs_unit_state),
1769                       64);
1770     assert(bo);
1771     render_state->vs.state = bo;
1772
1773     /* GS */
1774     /* CLIP */
1775     /* SF */
1776     dri_bo_unreference(render_state->sf.state);
1777     bo = dri_bo_alloc(i965->intel.bufmgr,
1778                       "sf state",
1779                       sizeof(struct i965_sf_unit_state),
1780                       64);
1781     assert(bo);
1782     render_state->sf.state = bo;
1783
1784     /* WM */
1785     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1786     bo = dri_bo_alloc(i965->intel.bufmgr,
1787                       "surface state & binding table",
1788                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1789                       4096);
1790     assert(bo);
1791     render_state->wm.surface_state_binding_table_bo = bo;
1792
1793     dri_bo_unreference(render_state->wm.sampler);
1794     bo = dri_bo_alloc(i965->intel.bufmgr,
1795                       "sampler state",
1796                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1797                       64);
1798     assert(bo);
1799     render_state->wm.sampler = bo;
1800     render_state->wm.sampler_count = 0;
1801
1802     dri_bo_unreference(render_state->wm.state);
1803     bo = dri_bo_alloc(i965->intel.bufmgr,
1804                       "wm state",
1805                       sizeof(struct i965_wm_unit_state),
1806                       64);
1807     assert(bo);
1808     render_state->wm.state = bo;
1809
1810     /* COLOR CALCULATOR */
1811     dri_bo_unreference(render_state->cc.state);
1812     bo = dri_bo_alloc(i965->intel.bufmgr,
1813                       "color calc state",
1814                       sizeof(struct i965_cc_unit_state),
1815                       64);
1816     assert(bo);
1817     render_state->cc.state = bo;
1818
1819     dri_bo_unreference(render_state->cc.viewport);
1820     bo = dri_bo_alloc(i965->intel.bufmgr,
1821                       "cc viewport",
1822                       sizeof(struct i965_cc_viewport),
1823                       64);
1824     assert(bo);
1825     render_state->cc.viewport = bo;
1826 }
1827
1828 static void
1829 i965_render_put_surface(
1830     VADriverContextP   ctx,
1831     struct object_surface *obj_surface,
1832     const VARectangle *src_rect,
1833     const VARectangle *dst_rect,
1834     unsigned int       flags
1835 )
1836 {
1837     struct i965_driver_data *i965 = i965_driver_data(ctx);
1838     struct intel_batchbuffer *batch = i965->batch;
1839
1840     i965_render_initialize(ctx);
1841     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1842     i965_surface_render_pipeline_setup(ctx);
1843     intel_batchbuffer_flush(batch);
1844 }
1845
1846 static void
1847 i965_render_put_subpicture(
1848     VADriverContextP   ctx,
1849     struct object_surface *obj_surface,
1850     const VARectangle *src_rect,
1851     const VARectangle *dst_rect
1852 )
1853 {
1854     struct i965_driver_data *i965 = i965_driver_data(ctx);
1855     struct intel_batchbuffer *batch = i965->batch;
1856     unsigned int index = obj_surface->subpic_render_idx;
1857     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1858
1859     assert(obj_subpic);
1860
1861     i965_render_initialize(ctx);
1862     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1863     i965_subpic_render_pipeline_setup(ctx);
1864     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1865     intel_batchbuffer_flush(batch);
1866 }
1867
1868 /*
1869  * for GEN6+
1870  */
1871 static void 
1872 gen6_render_initialize(VADriverContextP ctx)
1873 {
1874     struct i965_driver_data *i965 = i965_driver_data(ctx);
1875     struct i965_render_state *render_state = &i965->render_state;
1876     dri_bo *bo;
1877
1878     /* VERTEX BUFFER */
1879     dri_bo_unreference(render_state->vb.vertex_buffer);
1880     bo = dri_bo_alloc(i965->intel.bufmgr,
1881                       "vertex buffer",
1882                       4096,
1883                       4096);
1884     assert(bo);
1885     render_state->vb.vertex_buffer = bo;
1886
1887     /* WM */
1888     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1889     bo = dri_bo_alloc(i965->intel.bufmgr,
1890                       "surface state & binding table",
1891                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1892                       4096);
1893     assert(bo);
1894     render_state->wm.surface_state_binding_table_bo = bo;
1895
1896     dri_bo_unreference(render_state->wm.sampler);
1897     bo = dri_bo_alloc(i965->intel.bufmgr,
1898                       "sampler state",
1899                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1900                       4096);
1901     assert(bo);
1902     render_state->wm.sampler = bo;
1903     render_state->wm.sampler_count = 0;
1904
1905     /* COLOR CALCULATOR */
1906     dri_bo_unreference(render_state->cc.state);
1907     bo = dri_bo_alloc(i965->intel.bufmgr,
1908                       "color calc state",
1909                       sizeof(struct gen6_color_calc_state),
1910                       4096);
1911     assert(bo);
1912     render_state->cc.state = bo;
1913
1914     /* CC VIEWPORT */
1915     dri_bo_unreference(render_state->cc.viewport);
1916     bo = dri_bo_alloc(i965->intel.bufmgr,
1917                       "cc viewport",
1918                       sizeof(struct i965_cc_viewport),
1919                       4096);
1920     assert(bo);
1921     render_state->cc.viewport = bo;
1922
1923     /* BLEND STATE */
1924     dri_bo_unreference(render_state->cc.blend);
1925     bo = dri_bo_alloc(i965->intel.bufmgr,
1926                       "blend state",
1927                       sizeof(struct gen6_blend_state),
1928                       4096);
1929     assert(bo);
1930     render_state->cc.blend = bo;
1931
1932     /* DEPTH & STENCIL STATE */
1933     dri_bo_unreference(render_state->cc.depth_stencil);
1934     bo = dri_bo_alloc(i965->intel.bufmgr,
1935                       "depth & stencil state",
1936                       sizeof(struct gen6_depth_stencil_state),
1937                       4096);
1938     assert(bo);
1939     render_state->cc.depth_stencil = bo;
1940 }
1941
1942 static void
1943 gen6_render_color_calc_state(VADriverContextP ctx)
1944 {
1945     struct i965_driver_data *i965 = i965_driver_data(ctx);
1946     struct i965_render_state *render_state = &i965->render_state;
1947     struct gen6_color_calc_state *color_calc_state;
1948     
1949     dri_bo_map(render_state->cc.state, 1);
1950     assert(render_state->cc.state->virtual);
1951     color_calc_state = render_state->cc.state->virtual;
1952     memset(color_calc_state, 0, sizeof(*color_calc_state));
1953     color_calc_state->constant_r = 1.0;
1954     color_calc_state->constant_g = 0.0;
1955     color_calc_state->constant_b = 1.0;
1956     color_calc_state->constant_a = 1.0;
1957     dri_bo_unmap(render_state->cc.state);
1958 }
1959
1960 static void
1961 gen6_render_blend_state(VADriverContextP ctx)
1962 {
1963     struct i965_driver_data *i965 = i965_driver_data(ctx);
1964     struct i965_render_state *render_state = &i965->render_state;
1965     struct gen6_blend_state *blend_state;
1966     
1967     dri_bo_map(render_state->cc.blend, 1);
1968     assert(render_state->cc.blend->virtual);
1969     blend_state = render_state->cc.blend->virtual;
1970     memset(blend_state, 0, sizeof(*blend_state));
1971     blend_state->blend1.logic_op_enable = 1;
1972     blend_state->blend1.logic_op_func = 0xc;
1973     dri_bo_unmap(render_state->cc.blend);
1974 }
1975
1976 static void
1977 gen6_render_depth_stencil_state(VADriverContextP ctx)
1978 {
1979     struct i965_driver_data *i965 = i965_driver_data(ctx);
1980     struct i965_render_state *render_state = &i965->render_state;
1981     struct gen6_depth_stencil_state *depth_stencil_state;
1982     
1983     dri_bo_map(render_state->cc.depth_stencil, 1);
1984     assert(render_state->cc.depth_stencil->virtual);
1985     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1986     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1987     dri_bo_unmap(render_state->cc.depth_stencil);
1988 }
1989
1990 static void
1991 gen6_render_setup_states(
1992     VADriverContextP   ctx,
1993     struct object_surface *obj_surface,
1994     const VARectangle *src_rect,
1995     const VARectangle *dst_rect,
1996     unsigned int       flags
1997 )
1998 {
1999     i965_render_dest_surface_state(ctx, 0);
2000     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2001     i965_render_sampler(ctx);
2002     i965_render_cc_viewport(ctx);
2003     gen6_render_color_calc_state(ctx);
2004     gen6_render_blend_state(ctx);
2005     gen6_render_depth_stencil_state(ctx);
2006     i965_render_upload_constants(ctx, obj_surface, flags);
2007     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2008 }
2009
2010 static void
2011 gen6_emit_invarient_states(VADriverContextP ctx)
2012 {
2013     struct i965_driver_data *i965 = i965_driver_data(ctx);
2014     struct intel_batchbuffer *batch = i965->batch;
2015
2016     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2017
2018     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2019     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2020               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2021     OUT_BATCH(batch, 0);
2022
2023     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2024     OUT_BATCH(batch, 1);
2025
2026     /* Set system instruction pointer */
2027     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2028     OUT_BATCH(batch, 0);
2029 }
2030
2031 static void
2032 gen6_emit_state_base_address(VADriverContextP ctx)
2033 {
2034     struct i965_driver_data *i965 = i965_driver_data(ctx);
2035     struct intel_batchbuffer *batch = i965->batch;
2036     struct i965_render_state *render_state = &i965->render_state;
2037
2038     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2039     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2040     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2041     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2042     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2043     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2044     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2045     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2046     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2047     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2048 }
2049
2050 static void
2051 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
2052 {
2053     struct i965_driver_data *i965 = i965_driver_data(ctx);
2054     struct intel_batchbuffer *batch = i965->batch;
2055     struct i965_render_state *render_state = &i965->render_state;
2056
2057     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2058               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2059               (4 - 2));
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch, 0);
2062     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2063 }
2064
2065 static void
2066 gen6_emit_urb(VADriverContextP ctx)
2067 {
2068     struct i965_driver_data *i965 = i965_driver_data(ctx);
2069     struct intel_batchbuffer *batch = i965->batch;
2070
2071     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
2072     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2073               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2074     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2075               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2076 }
2077
2078 static void
2079 gen6_emit_cc_state_pointers(VADriverContextP ctx)
2080 {
2081     struct i965_driver_data *i965 = i965_driver_data(ctx);
2082     struct intel_batchbuffer *batch = i965->batch;
2083     struct i965_render_state *render_state = &i965->render_state;
2084
2085     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2086     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2087     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2088     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2089 }
2090
2091 static void
2092 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
2093 {
2094     struct i965_driver_data *i965 = i965_driver_data(ctx);
2095     struct intel_batchbuffer *batch = i965->batch;
2096     struct i965_render_state *render_state = &i965->render_state;
2097
2098     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2099               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2100               (4 - 2));
2101     OUT_BATCH(batch, 0); /* VS */
2102     OUT_BATCH(batch, 0); /* GS */
2103     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2104 }
2105
2106 static void
2107 gen6_emit_binding_table(VADriverContextP ctx)
2108 {
2109     struct i965_driver_data *i965 = i965_driver_data(ctx);
2110     struct intel_batchbuffer *batch = i965->batch;
2111
2112     /* Binding table pointers */
2113     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
2114               GEN6_BINDING_TABLE_MODIFY_PS |
2115               (4 - 2));
2116     OUT_BATCH(batch, 0);                /* vs */
2117     OUT_BATCH(batch, 0);                /* gs */
2118     /* Only the PS uses the binding table */
2119     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2120 }
2121
2122 static void
2123 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2124 {
2125     struct i965_driver_data *i965 = i965_driver_data(ctx);
2126     struct intel_batchbuffer *batch = i965->batch;
2127
2128     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2129     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2130               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2131     OUT_BATCH(batch, 0);
2132     OUT_BATCH(batch, 0);
2133     OUT_BATCH(batch, 0);
2134     OUT_BATCH(batch, 0);
2135     OUT_BATCH(batch, 0);
2136
2137     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2138     OUT_BATCH(batch, 0);
2139 }
2140
2141 static void
2142 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2143 {
2144     i965_render_drawing_rectangle(ctx);
2145 }
2146
2147 static void 
2148 gen6_emit_vs_state(VADriverContextP ctx)
2149 {
2150     struct i965_driver_data *i965 = i965_driver_data(ctx);
2151     struct intel_batchbuffer *batch = i965->batch;
2152
2153     /* disable VS constant buffer */
2154     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2155     OUT_BATCH(batch, 0);
2156     OUT_BATCH(batch, 0);
2157     OUT_BATCH(batch, 0);
2158     OUT_BATCH(batch, 0);
2159         
2160     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2161     OUT_BATCH(batch, 0); /* without VS kernel */
2162     OUT_BATCH(batch, 0);
2163     OUT_BATCH(batch, 0);
2164     OUT_BATCH(batch, 0);
2165     OUT_BATCH(batch, 0); /* pass-through */
2166 }
2167
2168 static void 
2169 gen6_emit_gs_state(VADriverContextP ctx)
2170 {
2171     struct i965_driver_data *i965 = i965_driver_data(ctx);
2172     struct intel_batchbuffer *batch = i965->batch;
2173
2174     /* disable GS constant buffer */
2175     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2176     OUT_BATCH(batch, 0);
2177     OUT_BATCH(batch, 0);
2178     OUT_BATCH(batch, 0);
2179     OUT_BATCH(batch, 0);
2180         
2181     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2182     OUT_BATCH(batch, 0); /* without GS kernel */
2183     OUT_BATCH(batch, 0);
2184     OUT_BATCH(batch, 0);
2185     OUT_BATCH(batch, 0);
2186     OUT_BATCH(batch, 0);
2187     OUT_BATCH(batch, 0); /* pass-through */
2188 }
2189
2190 static void 
2191 gen6_emit_clip_state(VADriverContextP ctx)
2192 {
2193     struct i965_driver_data *i965 = i965_driver_data(ctx);
2194     struct intel_batchbuffer *batch = i965->batch;
2195
2196     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2197     OUT_BATCH(batch, 0);
2198     OUT_BATCH(batch, 0); /* pass-through */
2199     OUT_BATCH(batch, 0);
2200 }
2201
2202 static void 
2203 gen6_emit_sf_state(VADriverContextP ctx)
2204 {
2205     struct i965_driver_data *i965 = i965_driver_data(ctx);
2206     struct intel_batchbuffer *batch = i965->batch;
2207
2208     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2209     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2210               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2211               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2212     OUT_BATCH(batch, 0);
2213     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2214     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2215     OUT_BATCH(batch, 0);
2216     OUT_BATCH(batch, 0);
2217     OUT_BATCH(batch, 0);
2218     OUT_BATCH(batch, 0);
2219     OUT_BATCH(batch, 0); /* DW9 */
2220     OUT_BATCH(batch, 0);
2221     OUT_BATCH(batch, 0);
2222     OUT_BATCH(batch, 0);
2223     OUT_BATCH(batch, 0);
2224     OUT_BATCH(batch, 0); /* DW14 */
2225     OUT_BATCH(batch, 0);
2226     OUT_BATCH(batch, 0);
2227     OUT_BATCH(batch, 0);
2228     OUT_BATCH(batch, 0);
2229     OUT_BATCH(batch, 0); /* DW19 */
2230 }
2231
2232 static void 
2233 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2234 {
2235     struct i965_driver_data *i965 = i965_driver_data(ctx);
2236     struct intel_batchbuffer *batch = i965->batch;
2237     struct i965_render_state *render_state = &i965->render_state;
2238
2239     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2240               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2241               (5 - 2));
2242     OUT_RELOC(batch, 
2243               render_state->curbe.bo,
2244               I915_GEM_DOMAIN_INSTRUCTION, 0,
2245               (URB_CS_ENTRY_SIZE-1));
2246     OUT_BATCH(batch, 0);
2247     OUT_BATCH(batch, 0);
2248     OUT_BATCH(batch, 0);
2249
2250     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2251     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2252               I915_GEM_DOMAIN_INSTRUCTION, 0,
2253               0);
2254     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2255               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2256     OUT_BATCH(batch, 0);
2257     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2258     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2259               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2260               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2261     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2262               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2263     OUT_BATCH(batch, 0);
2264     OUT_BATCH(batch, 0);
2265 }
2266
2267 static void
2268 gen6_emit_vertex_element_state(VADriverContextP ctx)
2269 {
2270     struct i965_driver_data *i965 = i965_driver_data(ctx);
2271     struct intel_batchbuffer *batch = i965->batch;
2272
2273     /* Set up our vertex elements, sourced from the single vertex buffer. */
2274     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2275     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2276     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2277               GEN6_VE0_VALID |
2278               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2279               (0 << VE0_OFFSET_SHIFT));
2280     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2281               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2282               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2283               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2284     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2285     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2286               GEN6_VE0_VALID |
2287               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2288               (8 << VE0_OFFSET_SHIFT));
2289     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2290               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2291               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2292               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2293 }
2294
2295 static void
2296 gen6_emit_vertices(VADriverContextP ctx)
2297 {
2298     struct i965_driver_data *i965 = i965_driver_data(ctx);
2299     struct intel_batchbuffer *batch = i965->batch;
2300     struct i965_render_state *render_state = &i965->render_state;
2301
2302     BEGIN_BATCH(batch, 11);
2303     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2304     OUT_BATCH(batch, 
2305               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2306               GEN6_VB0_VERTEXDATA |
2307               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2308     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2309     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2310     OUT_BATCH(batch, 0);
2311
2312     OUT_BATCH(batch, 
2313               CMD_3DPRIMITIVE |
2314               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2315               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2316               (0 << 9) |
2317               4);
2318     OUT_BATCH(batch, 3); /* vertex count per instance */
2319     OUT_BATCH(batch, 0); /* start vertex offset */
2320     OUT_BATCH(batch, 1); /* single instance */
2321     OUT_BATCH(batch, 0); /* start instance location */
2322     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2323     ADVANCE_BATCH(batch);
2324 }
2325
2326 static void
2327 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2328 {
2329     struct i965_driver_data *i965 = i965_driver_data(ctx);
2330     struct intel_batchbuffer *batch = i965->batch;
2331
2332     intel_batchbuffer_start_atomic(batch, 0x1000);
2333     intel_batchbuffer_emit_mi_flush(batch);
2334     gen6_emit_invarient_states(ctx);
2335     gen6_emit_state_base_address(ctx);
2336     gen6_emit_viewport_state_pointers(ctx);
2337     gen6_emit_urb(ctx);
2338     gen6_emit_cc_state_pointers(ctx);
2339     gen6_emit_sampler_state_pointers(ctx);
2340     gen6_emit_vs_state(ctx);
2341     gen6_emit_gs_state(ctx);
2342     gen6_emit_clip_state(ctx);
2343     gen6_emit_sf_state(ctx);
2344     gen6_emit_wm_state(ctx, kernel);
2345     gen6_emit_binding_table(ctx);
2346     gen6_emit_depth_buffer_state(ctx);
2347     gen6_emit_drawing_rectangle(ctx);
2348     gen6_emit_vertex_element_state(ctx);
2349     gen6_emit_vertices(ctx);
2350     intel_batchbuffer_end_atomic(batch);
2351 }
2352
2353 static void
2354 gen6_render_put_surface(
2355     VADriverContextP   ctx,
2356     struct object_surface *obj_surface,
2357     const VARectangle *src_rect,
2358     const VARectangle *dst_rect,
2359     unsigned int       flags
2360 )
2361 {
2362     struct i965_driver_data *i965 = i965_driver_data(ctx);
2363     struct intel_batchbuffer *batch = i965->batch;
2364
2365     gen6_render_initialize(ctx);
2366     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2367     i965_clear_dest_region(ctx);
2368     gen6_render_emit_states(ctx, PS_KERNEL);
2369     intel_batchbuffer_flush(batch);
2370 }
2371
2372 static void
2373 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2374 {
2375     struct i965_driver_data *i965 = i965_driver_data(ctx);
2376     struct i965_render_state *render_state = &i965->render_state;
2377     struct gen6_blend_state *blend_state;
2378
2379     dri_bo_unmap(render_state->cc.state);    
2380     dri_bo_map(render_state->cc.blend, 1);
2381     assert(render_state->cc.blend->virtual);
2382     blend_state = render_state->cc.blend->virtual;
2383     memset(blend_state, 0, sizeof(*blend_state));
2384     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2385     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2386     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2387     blend_state->blend0.blend_enable = 1;
2388     blend_state->blend1.post_blend_clamp_enable = 1;
2389     blend_state->blend1.pre_blend_clamp_enable = 1;
2390     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2391     dri_bo_unmap(render_state->cc.blend);
2392 }
2393
2394 static void
2395 gen6_subpicture_render_setup_states(
2396     VADriverContextP   ctx,
2397     struct object_surface *obj_surface,
2398     const VARectangle *src_rect,
2399     const VARectangle *dst_rect
2400 )
2401 {
2402     i965_render_dest_surface_state(ctx, 0);
2403     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2404     i965_render_sampler(ctx);
2405     i965_render_cc_viewport(ctx);
2406     gen6_render_color_calc_state(ctx);
2407     gen6_subpicture_render_blend_state(ctx);
2408     gen6_render_depth_stencil_state(ctx);
2409     i965_subpic_render_upload_constants(ctx, obj_surface);
2410     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2411 }
2412
2413 static void
2414 gen6_render_put_subpicture(
2415     VADriverContextP   ctx,
2416     struct object_surface *obj_surface,
2417     const VARectangle *src_rect,
2418     const VARectangle *dst_rect
2419 )
2420 {
2421     struct i965_driver_data *i965 = i965_driver_data(ctx);
2422     struct intel_batchbuffer *batch = i965->batch;
2423     unsigned int index = obj_surface->subpic_render_idx;
2424     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2425
2426     assert(obj_subpic);
2427     gen6_render_initialize(ctx);
2428     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2429     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2430     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2431     intel_batchbuffer_flush(batch);
2432 }
2433
2434 /*
2435  * for GEN7
2436  */
2437 static void 
2438 gen7_render_initialize(VADriverContextP ctx)
2439 {
2440     struct i965_driver_data *i965 = i965_driver_data(ctx);
2441     struct i965_render_state *render_state = &i965->render_state;
2442     dri_bo *bo;
2443
2444     /* VERTEX BUFFER */
2445     dri_bo_unreference(render_state->vb.vertex_buffer);
2446     bo = dri_bo_alloc(i965->intel.bufmgr,
2447                       "vertex buffer",
2448                       4096,
2449                       4096);
2450     assert(bo);
2451     render_state->vb.vertex_buffer = bo;
2452
2453     /* WM */
2454     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2455     bo = dri_bo_alloc(i965->intel.bufmgr,
2456                       "surface state & binding table",
2457                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2458                       4096);
2459     assert(bo);
2460     render_state->wm.surface_state_binding_table_bo = bo;
2461
2462     dri_bo_unreference(render_state->wm.sampler);
2463     bo = dri_bo_alloc(i965->intel.bufmgr,
2464                       "sampler state",
2465                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2466                       4096);
2467     assert(bo);
2468     render_state->wm.sampler = bo;
2469     render_state->wm.sampler_count = 0;
2470
2471     /* COLOR CALCULATOR */
2472     dri_bo_unreference(render_state->cc.state);
2473     bo = dri_bo_alloc(i965->intel.bufmgr,
2474                       "color calc state",
2475                       sizeof(struct gen6_color_calc_state),
2476                       4096);
2477     assert(bo);
2478     render_state->cc.state = bo;
2479
2480     /* CC VIEWPORT */
2481     dri_bo_unreference(render_state->cc.viewport);
2482     bo = dri_bo_alloc(i965->intel.bufmgr,
2483                       "cc viewport",
2484                       sizeof(struct i965_cc_viewport),
2485                       4096);
2486     assert(bo);
2487     render_state->cc.viewport = bo;
2488
2489     /* BLEND STATE */
2490     dri_bo_unreference(render_state->cc.blend);
2491     bo = dri_bo_alloc(i965->intel.bufmgr,
2492                       "blend state",
2493                       sizeof(struct gen6_blend_state),
2494                       4096);
2495     assert(bo);
2496     render_state->cc.blend = bo;
2497
2498     /* DEPTH & STENCIL STATE */
2499     dri_bo_unreference(render_state->cc.depth_stencil);
2500     bo = dri_bo_alloc(i965->intel.bufmgr,
2501                       "depth & stencil state",
2502                       sizeof(struct gen6_depth_stencil_state),
2503                       4096);
2504     assert(bo);
2505     render_state->cc.depth_stencil = bo;
2506 }
2507
2508 /*
2509  * for GEN8
2510  */
2511 static void 
2512 gen8_render_initialize(VADriverContextP ctx)
2513 {
2514     struct i965_driver_data *i965 = i965_driver_data(ctx);
2515     struct i965_render_state *render_state = &i965->render_state;
2516     dri_bo *bo;
2517
2518     /* VERTEX BUFFER */
2519     dri_bo_unreference(render_state->vb.vertex_buffer);
2520     bo = dri_bo_alloc(i965->intel.bufmgr,
2521                       "vertex buffer",
2522                       4096,
2523                       4096);
2524     assert(bo);
2525     render_state->vb.vertex_buffer = bo;
2526
2527     /* WM */
2528     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2529     bo = dri_bo_alloc(i965->intel.bufmgr,
2530                       "surface state & binding table",
2531                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2532                       4096);
2533     assert(bo);
2534     render_state->wm.surface_state_binding_table_bo = bo;
2535
2536     dri_bo_unreference(render_state->wm.sampler);
2537     bo = dri_bo_alloc(i965->intel.bufmgr,
2538                       "sampler state",
2539                       MAX_SAMPLERS * sizeof(struct gen8_sampler_state),
2540                       4096);
2541     assert(bo);
2542     render_state->wm.sampler = bo;
2543     render_state->wm.sampler_count = 0;
2544
2545     /* COLOR CALCULATOR */
2546     dri_bo_unreference(render_state->cc.state);
2547     bo = dri_bo_alloc(i965->intel.bufmgr,
2548                       "color calc state",
2549                       sizeof(struct gen6_color_calc_state),
2550                       4096);
2551     assert(bo);
2552     render_state->cc.state = bo;
2553
2554     /* CC VIEWPORT */
2555     dri_bo_unreference(render_state->cc.viewport);
2556     bo = dri_bo_alloc(i965->intel.bufmgr,
2557                       "cc viewport",
2558                       sizeof(struct i965_cc_viewport),
2559                       4096);
2560     assert(bo);
2561     render_state->cc.viewport = bo;
2562
2563     /* BLEND STATE */
2564     dri_bo_unreference(render_state->cc.blend);
2565     bo = dri_bo_alloc(i965->intel.bufmgr,
2566                       "blend state",
2567                       sizeof(struct gen6_blend_state),
2568                       4096);
2569     assert(bo);
2570     render_state->cc.blend = bo;
2571
2572     /* DEPTH & STENCIL STATE */
2573     dri_bo_unreference(render_state->cc.depth_stencil);
2574     bo = dri_bo_alloc(i965->intel.bufmgr,
2575                       "depth & stencil state",
2576                       sizeof(struct gen6_depth_stencil_state),
2577                       4096);
2578     assert(bo);
2579     render_state->cc.depth_stencil = bo;
2580 }
2581
2582 static void
2583 gen7_render_color_calc_state(VADriverContextP ctx)
2584 {
2585     struct i965_driver_data *i965 = i965_driver_data(ctx);
2586     struct i965_render_state *render_state = &i965->render_state;
2587     struct gen6_color_calc_state *color_calc_state;
2588     
2589     dri_bo_map(render_state->cc.state, 1);
2590     assert(render_state->cc.state->virtual);
2591     color_calc_state = render_state->cc.state->virtual;
2592     memset(color_calc_state, 0, sizeof(*color_calc_state));
2593     color_calc_state->constant_r = 1.0;
2594     color_calc_state->constant_g = 0.0;
2595     color_calc_state->constant_b = 1.0;
2596     color_calc_state->constant_a = 1.0;
2597     dri_bo_unmap(render_state->cc.state);
2598 }
2599
2600 static void
2601 gen7_render_blend_state(VADriverContextP ctx)
2602 {
2603     struct i965_driver_data *i965 = i965_driver_data(ctx);
2604     struct i965_render_state *render_state = &i965->render_state;
2605     struct gen6_blend_state *blend_state;
2606     
2607     dri_bo_map(render_state->cc.blend, 1);
2608     assert(render_state->cc.blend->virtual);
2609     blend_state = render_state->cc.blend->virtual;
2610     memset(blend_state, 0, sizeof(*blend_state));
2611     blend_state->blend1.logic_op_enable = 1;
2612     blend_state->blend1.logic_op_func = 0xc;
2613     blend_state->blend1.pre_blend_clamp_enable = 1;
2614     dri_bo_unmap(render_state->cc.blend);
2615 }
2616
2617 static void
2618 gen7_render_depth_stencil_state(VADriverContextP ctx)
2619 {
2620     struct i965_driver_data *i965 = i965_driver_data(ctx);
2621     struct i965_render_state *render_state = &i965->render_state;
2622     struct gen6_depth_stencil_state *depth_stencil_state;
2623     
2624     dri_bo_map(render_state->cc.depth_stencil, 1);
2625     assert(render_state->cc.depth_stencil->virtual);
2626     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2627     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2628     dri_bo_unmap(render_state->cc.depth_stencil);
2629 }
2630
2631 static void 
2632 gen7_render_sampler(VADriverContextP ctx)
2633 {
2634     struct i965_driver_data *i965 = i965_driver_data(ctx);
2635     struct i965_render_state *render_state = &i965->render_state;
2636     struct gen7_sampler_state *sampler_state;
2637     int i;
2638     
2639     assert(render_state->wm.sampler_count > 0);
2640     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2641
2642     dri_bo_map(render_state->wm.sampler, 1);
2643     assert(render_state->wm.sampler->virtual);
2644     sampler_state = render_state->wm.sampler->virtual;
2645     for (i = 0; i < render_state->wm.sampler_count; i++) {
2646         memset(sampler_state, 0, sizeof(*sampler_state));
2647         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2648         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2649         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2650         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2651         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2652         sampler_state++;
2653     }
2654
2655     dri_bo_unmap(render_state->wm.sampler);
2656 }
2657
2658 static void 
2659 gen8_render_sampler(VADriverContextP ctx)
2660 {
2661     struct i965_driver_data *i965 = i965_driver_data(ctx);
2662     struct i965_render_state *render_state = &i965->render_state;
2663     struct gen8_sampler_state *sampler_state;
2664     int i;
2665     
2666     assert(render_state->wm.sampler_count > 0);
2667     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2668
2669     dri_bo_map(render_state->wm.sampler, 1);
2670     assert(render_state->wm.sampler->virtual);
2671     sampler_state = render_state->wm.sampler->virtual;
2672     for (i = 0; i < render_state->wm.sampler_count; i++) {
2673         memset(sampler_state, 0, sizeof(*sampler_state));
2674         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2675         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2676         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2677         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2678         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2679         sampler_state++;
2680     }
2681
2682     dri_bo_unmap(render_state->wm.sampler);
2683 }
2684
2685
2686 static void
2687 gen7_render_setup_states(
2688     VADriverContextP   ctx,
2689     struct object_surface *obj_surface,
2690     const VARectangle *src_rect,
2691     const VARectangle *dst_rect,
2692     unsigned int       flags
2693 )
2694 {
2695     i965_render_dest_surface_state(ctx, 0);
2696     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2697     gen7_render_sampler(ctx);
2698     i965_render_cc_viewport(ctx);
2699     gen7_render_color_calc_state(ctx);
2700     gen7_render_blend_state(ctx);
2701     gen7_render_depth_stencil_state(ctx);
2702     i965_render_upload_constants(ctx, obj_surface, flags);
2703     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2704 }
2705
2706 static void
2707 gen8_render_setup_states(
2708     VADriverContextP   ctx,
2709     struct object_surface *obj_surface,
2710     const VARectangle *src_rect,
2711     const VARectangle *dst_rect,
2712     unsigned int       flags
2713 )
2714 {
2715     i965_render_dest_surface_state(ctx, 0);
2716     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2717     gen8_render_sampler(ctx);
2718     i965_render_cc_viewport(ctx);
2719     gen7_render_color_calc_state(ctx);
2720     gen7_render_blend_state(ctx);
2721     gen7_render_depth_stencil_state(ctx);
2722     i965_render_upload_constants(ctx, obj_surface, flags);
2723     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2724 }
2725
2726 static void
2727 gen7_emit_invarient_states(VADriverContextP ctx)
2728 {
2729     struct i965_driver_data *i965 = i965_driver_data(ctx);
2730     struct intel_batchbuffer *batch = i965->batch;
2731
2732     BEGIN_BATCH(batch, 1);
2733     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2734     ADVANCE_BATCH(batch);
2735
2736     BEGIN_BATCH(batch, 4);
2737     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2738     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2739               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2740     OUT_BATCH(batch, 0);
2741     OUT_BATCH(batch, 0);
2742     ADVANCE_BATCH(batch);
2743
2744     BEGIN_BATCH(batch, 2);
2745     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2746     OUT_BATCH(batch, 1);
2747     ADVANCE_BATCH(batch);
2748
2749     /* Set system instruction pointer */
2750     BEGIN_BATCH(batch, 2);
2751     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2752     OUT_BATCH(batch, 0);
2753     ADVANCE_BATCH(batch);
2754 }
2755
2756 static void
2757 gen7_emit_state_base_address(VADriverContextP ctx)
2758 {
2759     struct i965_driver_data *i965 = i965_driver_data(ctx);
2760     struct intel_batchbuffer *batch = i965->batch;
2761     struct i965_render_state *render_state = &i965->render_state;
2762
2763     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2764     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2765     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2766     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2767     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2768     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2769     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2770     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2771     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2772     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2773 }
2774
2775 static void
2776 gen8_emit_state_base_address(VADriverContextP ctx)
2777 {
2778     struct i965_driver_data *i965 = i965_driver_data(ctx);
2779     struct intel_batchbuffer *batch = i965->batch;
2780     struct i965_render_state *render_state = &i965->render_state;
2781
2782     BEGIN_BATCH(batch, 16);
2783     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
2784     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2785         OUT_BATCH(batch, 0);
2786         OUT_BATCH(batch, 0);
2787         /*DW4 */
2788     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2789         OUT_BATCH(batch, 0);
2790
2791         /*DW6*/
2792     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2793         OUT_BATCH(batch, 0);
2794
2795         /*DW8*/
2796     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2797         OUT_BATCH(batch, 0);
2798
2799         /*DW10 */
2800     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2801         OUT_BATCH(batch, 0);
2802
2803         /*DW12 */       
2804     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
2805     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2806     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2807     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2808     ADVANCE_BATCH(batch);
2809 }
2810
2811 static void
2812 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2813 {
2814     struct i965_driver_data *i965 = i965_driver_data(ctx);
2815     struct intel_batchbuffer *batch = i965->batch;
2816     struct i965_render_state *render_state = &i965->render_state;
2817
2818     BEGIN_BATCH(batch, 2);
2819     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2820     OUT_RELOC(batch,
2821               render_state->cc.viewport,
2822               I915_GEM_DOMAIN_INSTRUCTION, 0,
2823               0);
2824     ADVANCE_BATCH(batch);
2825
2826     BEGIN_BATCH(batch, 2);
2827     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2828     OUT_BATCH(batch, 0);
2829     ADVANCE_BATCH(batch);
2830 }
2831
2832 /*
2833  * URB layout on GEN7 
2834  * ----------------------------------------
2835  * | PS Push Constants (8KB) | VS entries |
2836  * ----------------------------------------
2837  */
2838 static void
2839 gen7_emit_urb(VADriverContextP ctx)
2840 {
2841     struct i965_driver_data *i965 = i965_driver_data(ctx);
2842     struct intel_batchbuffer *batch = i965->batch;
2843     unsigned int num_urb_entries = 32;
2844
2845     if (IS_HASWELL(i965->intel.device_id))
2846         num_urb_entries = 64;
2847
2848     BEGIN_BATCH(batch, 2);
2849     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2850     OUT_BATCH(batch, 8); /* in 1KBs */
2851     ADVANCE_BATCH(batch);
2852
2853     BEGIN_BATCH(batch, 2);
2854     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2855     OUT_BATCH(batch, 
2856               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2857               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2858               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2859    ADVANCE_BATCH(batch);
2860
2861    BEGIN_BATCH(batch, 2);
2862    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2863    OUT_BATCH(batch,
2864              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2865              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2866    ADVANCE_BATCH(batch);
2867
2868    BEGIN_BATCH(batch, 2);
2869    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2870    OUT_BATCH(batch,
2871              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2872              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2873    ADVANCE_BATCH(batch);
2874
2875    BEGIN_BATCH(batch, 2);
2876    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2877    OUT_BATCH(batch,
2878              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2879              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2880    ADVANCE_BATCH(batch);
2881 }
2882
2883 static void
2884 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2885 {
2886     struct i965_driver_data *i965 = i965_driver_data(ctx);
2887     struct intel_batchbuffer *batch = i965->batch;
2888     struct i965_render_state *render_state = &i965->render_state;
2889
2890     BEGIN_BATCH(batch, 2);
2891     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2892     OUT_RELOC(batch,
2893               render_state->cc.state,
2894               I915_GEM_DOMAIN_INSTRUCTION, 0,
2895               1);
2896     ADVANCE_BATCH(batch);
2897
2898     BEGIN_BATCH(batch, 2);
2899     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2900     OUT_RELOC(batch,
2901               render_state->cc.blend,
2902               I915_GEM_DOMAIN_INSTRUCTION, 0,
2903               1);
2904     ADVANCE_BATCH(batch);
2905
2906     BEGIN_BATCH(batch, 2);
2907     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2908     OUT_RELOC(batch,
2909               render_state->cc.depth_stencil,
2910               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2911               1);
2912     ADVANCE_BATCH(batch);
2913 }
2914
2915 static void
2916 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2917 {
2918     struct i965_driver_data *i965 = i965_driver_data(ctx);
2919     struct intel_batchbuffer *batch = i965->batch;
2920     struct i965_render_state *render_state = &i965->render_state;
2921
2922     BEGIN_BATCH(batch, 2);
2923     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2924     OUT_RELOC(batch,
2925               render_state->wm.sampler,
2926               I915_GEM_DOMAIN_INSTRUCTION, 0,
2927               0);
2928     ADVANCE_BATCH(batch);
2929 }
2930
2931 static void
2932 gen7_emit_binding_table(VADriverContextP ctx)
2933 {
2934     struct i965_driver_data *i965 = i965_driver_data(ctx);
2935     struct intel_batchbuffer *batch = i965->batch;
2936
2937     BEGIN_BATCH(batch, 2);
2938     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2939     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2940     ADVANCE_BATCH(batch);
2941 }
2942
2943 static void
2944 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2945 {
2946     struct i965_driver_data *i965 = i965_driver_data(ctx);
2947     struct intel_batchbuffer *batch = i965->batch;
2948
2949     BEGIN_BATCH(batch, 7);
2950     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2951     OUT_BATCH(batch,
2952               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2953               (I965_SURFACE_NULL << 29));
2954     OUT_BATCH(batch, 0);
2955     OUT_BATCH(batch, 0);
2956     OUT_BATCH(batch, 0);
2957     OUT_BATCH(batch, 0);
2958     OUT_BATCH(batch, 0);
2959     ADVANCE_BATCH(batch);
2960
2961     BEGIN_BATCH(batch, 3);
2962     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2963     OUT_BATCH(batch, 0);
2964     OUT_BATCH(batch, 0);
2965     ADVANCE_BATCH(batch);
2966 }
2967
2968 static void
2969 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2970 {
2971     i965_render_drawing_rectangle(ctx);
2972 }
2973
2974 static void 
2975 gen7_emit_vs_state(VADriverContextP ctx)
2976 {
2977     struct i965_driver_data *i965 = i965_driver_data(ctx);
2978     struct intel_batchbuffer *batch = i965->batch;
2979
2980     /* disable VS constant buffer */
2981     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2982     OUT_BATCH(batch, 0);
2983     OUT_BATCH(batch, 0);
2984     OUT_BATCH(batch, 0);
2985     OUT_BATCH(batch, 0);
2986     OUT_BATCH(batch, 0);
2987     OUT_BATCH(batch, 0);
2988         
2989     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2990     OUT_BATCH(batch, 0); /* without VS kernel */
2991     OUT_BATCH(batch, 0);
2992     OUT_BATCH(batch, 0);
2993     OUT_BATCH(batch, 0);
2994     OUT_BATCH(batch, 0); /* pass-through */
2995 }
2996
2997 static void 
2998 gen7_emit_bypass_state(VADriverContextP ctx)
2999 {
3000     struct i965_driver_data *i965 = i965_driver_data(ctx);
3001     struct intel_batchbuffer *batch = i965->batch;
3002
3003     /* bypass GS */
3004     BEGIN_BATCH(batch, 7);
3005     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
3006     OUT_BATCH(batch, 0);
3007     OUT_BATCH(batch, 0);
3008     OUT_BATCH(batch, 0);
3009     OUT_BATCH(batch, 0);
3010     OUT_BATCH(batch, 0);
3011     OUT_BATCH(batch, 0);
3012     ADVANCE_BATCH(batch);
3013
3014     BEGIN_BATCH(batch, 7);      
3015     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
3016     OUT_BATCH(batch, 0); /* without GS kernel */
3017     OUT_BATCH(batch, 0);
3018     OUT_BATCH(batch, 0);
3019     OUT_BATCH(batch, 0);
3020     OUT_BATCH(batch, 0);
3021     OUT_BATCH(batch, 0); /* pass-through */
3022     ADVANCE_BATCH(batch);
3023
3024     BEGIN_BATCH(batch, 2);
3025     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3026     OUT_BATCH(batch, 0);
3027     ADVANCE_BATCH(batch);
3028
3029     /* disable HS */
3030     BEGIN_BATCH(batch, 7);
3031     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
3032     OUT_BATCH(batch, 0);
3033     OUT_BATCH(batch, 0);
3034     OUT_BATCH(batch, 0);
3035     OUT_BATCH(batch, 0);
3036     OUT_BATCH(batch, 0);
3037     OUT_BATCH(batch, 0);
3038     ADVANCE_BATCH(batch);
3039
3040     BEGIN_BATCH(batch, 7);
3041     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
3042     OUT_BATCH(batch, 0);
3043     OUT_BATCH(batch, 0);
3044     OUT_BATCH(batch, 0);
3045     OUT_BATCH(batch, 0);
3046     OUT_BATCH(batch, 0);
3047     OUT_BATCH(batch, 0);
3048     ADVANCE_BATCH(batch);
3049
3050     BEGIN_BATCH(batch, 2);
3051     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3052     OUT_BATCH(batch, 0);
3053     ADVANCE_BATCH(batch);
3054
3055     /* Disable TE */
3056     BEGIN_BATCH(batch, 4);
3057     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3058     OUT_BATCH(batch, 0);
3059     OUT_BATCH(batch, 0);
3060     OUT_BATCH(batch, 0);
3061     ADVANCE_BATCH(batch);
3062
3063     /* Disable DS */
3064     BEGIN_BATCH(batch, 7);
3065     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
3066     OUT_BATCH(batch, 0);
3067     OUT_BATCH(batch, 0);
3068     OUT_BATCH(batch, 0);
3069     OUT_BATCH(batch, 0);
3070     OUT_BATCH(batch, 0);
3071     OUT_BATCH(batch, 0);
3072     ADVANCE_BATCH(batch);
3073
3074     BEGIN_BATCH(batch, 6);
3075     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
3076     OUT_BATCH(batch, 0);
3077     OUT_BATCH(batch, 0);
3078     OUT_BATCH(batch, 0);
3079     OUT_BATCH(batch, 0);
3080     OUT_BATCH(batch, 0);
3081     ADVANCE_BATCH(batch);
3082
3083     BEGIN_BATCH(batch, 2);
3084     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3085     OUT_BATCH(batch, 0);
3086     ADVANCE_BATCH(batch);
3087
3088     /* Disable STREAMOUT */
3089     BEGIN_BATCH(batch, 3);
3090     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
3091     OUT_BATCH(batch, 0);
3092     OUT_BATCH(batch, 0);
3093     ADVANCE_BATCH(batch);
3094 }
3095
3096 static void 
3097 gen7_emit_clip_state(VADriverContextP ctx)
3098 {
3099     struct i965_driver_data *i965 = i965_driver_data(ctx);
3100     struct intel_batchbuffer *batch = i965->batch;
3101
3102     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3103     OUT_BATCH(batch, 0);
3104     OUT_BATCH(batch, 0); /* pass-through */
3105     OUT_BATCH(batch, 0);
3106 }
3107
3108 static void 
3109 gen7_emit_sf_state(VADriverContextP ctx)
3110 {
3111     struct i965_driver_data *i965 = i965_driver_data(ctx);
3112     struct intel_batchbuffer *batch = i965->batch;
3113
3114     BEGIN_BATCH(batch, 14);
3115     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
3116     OUT_BATCH(batch,
3117               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3118               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3119               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3120     OUT_BATCH(batch, 0);
3121     OUT_BATCH(batch, 0);
3122     OUT_BATCH(batch, 0); /* DW4 */
3123     OUT_BATCH(batch, 0);
3124     OUT_BATCH(batch, 0);
3125     OUT_BATCH(batch, 0);
3126     OUT_BATCH(batch, 0);
3127     OUT_BATCH(batch, 0); /* DW9 */
3128     OUT_BATCH(batch, 0);
3129     OUT_BATCH(batch, 0);
3130     OUT_BATCH(batch, 0);
3131     OUT_BATCH(batch, 0);
3132     ADVANCE_BATCH(batch);
3133
3134     BEGIN_BATCH(batch, 7);
3135     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
3136     OUT_BATCH(batch, 0);
3137     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
3138     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3139     OUT_BATCH(batch, 0);
3140     OUT_BATCH(batch, 0);
3141     OUT_BATCH(batch, 0);
3142     ADVANCE_BATCH(batch);
3143 }
3144
3145 static void 
3146 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
3147 {
3148     struct i965_driver_data *i965 = i965_driver_data(ctx);
3149     struct intel_batchbuffer *batch = i965->batch;
3150     struct i965_render_state *render_state = &i965->render_state;
3151     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
3152     unsigned int num_samples = 0;
3153
3154     if (IS_HASWELL(i965->intel.device_id)) {
3155         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
3156         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
3157     }
3158
3159     BEGIN_BATCH(batch, 3);
3160     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
3161     OUT_BATCH(batch,
3162               GEN7_WM_DISPATCH_ENABLE |
3163               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3164     OUT_BATCH(batch, 0);
3165     ADVANCE_BATCH(batch);
3166
3167     BEGIN_BATCH(batch, 7);
3168     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
3169     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
3170     OUT_BATCH(batch, 0);
3171     OUT_RELOC(batch, 
3172               render_state->curbe.bo,
3173               I915_GEM_DOMAIN_INSTRUCTION, 0,
3174               0);
3175     OUT_BATCH(batch, 0);
3176     OUT_BATCH(batch, 0);
3177     OUT_BATCH(batch, 0);
3178     ADVANCE_BATCH(batch);
3179
3180     BEGIN_BATCH(batch, 8);
3181     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
3182     OUT_RELOC(batch, 
3183               render_state->render_kernels[kernel].bo,
3184               I915_GEM_DOMAIN_INSTRUCTION, 0,
3185               0);
3186     OUT_BATCH(batch, 
3187               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3188               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3189     OUT_BATCH(batch, 0); /* scratch space base offset */
3190     OUT_BATCH(batch, 
3191               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
3192               GEN7_PS_PUSH_CONSTANT_ENABLE |
3193               GEN7_PS_ATTRIBUTE_ENABLE |
3194               GEN7_PS_16_DISPATCH_ENABLE);
3195     OUT_BATCH(batch, 
3196               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3197     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3198     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3199     ADVANCE_BATCH(batch);
3200 }
3201
3202 static void
3203 gen7_emit_vertex_element_state(VADriverContextP ctx)
3204 {
3205     struct i965_driver_data *i965 = i965_driver_data(ctx);
3206     struct intel_batchbuffer *batch = i965->batch;
3207
3208     /* Set up our vertex elements, sourced from the single vertex buffer. */
3209     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3210     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3211     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3212               GEN6_VE0_VALID |
3213               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3214               (0 << VE0_OFFSET_SHIFT));
3215     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3216               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3217               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3218               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3219     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3220     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3221               GEN6_VE0_VALID |
3222               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3223               (8 << VE0_OFFSET_SHIFT));
3224     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3225               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3226               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3227               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3228 }
3229
3230 static void
3231 gen7_emit_vertices(VADriverContextP ctx)
3232 {
3233     struct i965_driver_data *i965 = i965_driver_data(ctx);
3234     struct intel_batchbuffer *batch = i965->batch;
3235     struct i965_render_state *render_state = &i965->render_state;
3236
3237     BEGIN_BATCH(batch, 5);
3238     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3239     OUT_BATCH(batch, 
3240               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
3241               GEN6_VB0_VERTEXDATA |
3242               GEN7_VB0_ADDRESS_MODIFYENABLE |
3243               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3244     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3245     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
3246     OUT_BATCH(batch, 0);
3247     ADVANCE_BATCH(batch);
3248
3249     BEGIN_BATCH(batch, 7);
3250     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3251     OUT_BATCH(batch,
3252               _3DPRIM_RECTLIST |
3253               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3254     OUT_BATCH(batch, 3); /* vertex count per instance */
3255     OUT_BATCH(batch, 0); /* start vertex offset */
3256     OUT_BATCH(batch, 1); /* single instance */
3257     OUT_BATCH(batch, 0); /* start instance location */
3258     OUT_BATCH(batch, 0);
3259     ADVANCE_BATCH(batch);
3260 }
3261
3262 static void
3263 gen7_render_emit_states(VADriverContextP ctx, int kernel)
3264 {
3265     struct i965_driver_data *i965 = i965_driver_data(ctx);
3266     struct intel_batchbuffer *batch = i965->batch;
3267
3268     intel_batchbuffer_start_atomic(batch, 0x1000);
3269     intel_batchbuffer_emit_mi_flush(batch);
3270     gen7_emit_invarient_states(ctx);
3271     gen7_emit_state_base_address(ctx);
3272     gen7_emit_viewport_state_pointers(ctx);
3273     gen7_emit_urb(ctx);
3274     gen7_emit_cc_state_pointers(ctx);
3275     gen7_emit_sampler_state_pointers(ctx);
3276     gen7_emit_bypass_state(ctx);
3277     gen7_emit_vs_state(ctx);
3278     gen7_emit_clip_state(ctx);
3279     gen7_emit_sf_state(ctx);
3280     gen7_emit_wm_state(ctx, kernel);
3281     gen7_emit_binding_table(ctx);
3282     gen7_emit_depth_buffer_state(ctx);
3283     gen7_emit_drawing_rectangle(ctx);
3284     gen7_emit_vertex_element_state(ctx);
3285     gen7_emit_vertices(ctx);
3286     intel_batchbuffer_end_atomic(batch);
3287 }
3288
3289 static void
3290 gen8_render_emit_states(VADriverContextP ctx, int kernel)
3291 {
3292     struct i965_driver_data *i965 = i965_driver_data(ctx);
3293     struct intel_batchbuffer *batch = i965->batch;
3294
3295     intel_batchbuffer_start_atomic(batch, 0x1000);
3296     intel_batchbuffer_emit_mi_flush(batch);
3297     gen7_emit_invarient_states(ctx);
3298     gen8_emit_state_base_address(ctx);
3299     gen7_emit_viewport_state_pointers(ctx);
3300     gen7_emit_urb(ctx);
3301     gen7_emit_cc_state_pointers(ctx);
3302     gen7_emit_sampler_state_pointers(ctx);
3303     gen7_emit_bypass_state(ctx);
3304     gen7_emit_vs_state(ctx);
3305     gen7_emit_clip_state(ctx);
3306     gen7_emit_sf_state(ctx);
3307     gen7_emit_wm_state(ctx, kernel);
3308     gen7_emit_binding_table(ctx);
3309     gen7_emit_depth_buffer_state(ctx);
3310     gen7_emit_drawing_rectangle(ctx);
3311     gen7_emit_vertex_element_state(ctx);
3312     gen7_emit_vertices(ctx);
3313     intel_batchbuffer_end_atomic(batch);
3314 }
3315
3316 static void
3317 gen7_render_put_surface(
3318     VADriverContextP   ctx,
3319     struct object_surface *obj_surface,    
3320     const VARectangle *src_rect,
3321     const VARectangle *dst_rect,
3322     unsigned int       flags
3323 )
3324 {
3325     struct i965_driver_data *i965 = i965_driver_data(ctx);
3326     struct intel_batchbuffer *batch = i965->batch;
3327
3328     gen7_render_initialize(ctx);
3329     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3330     i965_clear_dest_region(ctx);
3331     gen7_render_emit_states(ctx, PS_KERNEL);
3332     intel_batchbuffer_flush(batch);
3333 }
3334
3335 static void
3336 gen8_render_put_surface(
3337     VADriverContextP   ctx,
3338     struct object_surface *obj_surface,    
3339     const VARectangle *src_rect,
3340     const VARectangle *dst_rect,
3341     unsigned int       flags
3342 )
3343 {
3344     struct i965_driver_data *i965 = i965_driver_data(ctx);
3345     struct intel_batchbuffer *batch = i965->batch;
3346
3347     gen8_render_initialize(ctx);
3348     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3349     i965_clear_dest_region(ctx);
3350     gen8_render_emit_states(ctx, PS_KERNEL);
3351     intel_batchbuffer_flush(batch);
3352 }
3353
3354 static void
3355 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3356 {
3357     struct i965_driver_data *i965 = i965_driver_data(ctx);
3358     struct i965_render_state *render_state = &i965->render_state;
3359     struct gen6_blend_state *blend_state;
3360
3361     dri_bo_unmap(render_state->cc.state);    
3362     dri_bo_map(render_state->cc.blend, 1);
3363     assert(render_state->cc.blend->virtual);
3364     blend_state = render_state->cc.blend->virtual;
3365     memset(blend_state, 0, sizeof(*blend_state));
3366     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3367     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3368     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3369     blend_state->blend0.blend_enable = 1;
3370     blend_state->blend1.post_blend_clamp_enable = 1;
3371     blend_state->blend1.pre_blend_clamp_enable = 1;
3372     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3373     dri_bo_unmap(render_state->cc.blend);
3374 }
3375
3376 static void
3377 gen7_subpicture_render_setup_states(
3378     VADriverContextP   ctx,
3379     struct object_surface *obj_surface,
3380     const VARectangle *src_rect,
3381     const VARectangle *dst_rect
3382 )
3383 {
3384     i965_render_dest_surface_state(ctx, 0);
3385     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3386     i965_render_sampler(ctx);
3387     i965_render_cc_viewport(ctx);
3388     gen7_render_color_calc_state(ctx);
3389     gen7_subpicture_render_blend_state(ctx);
3390     gen7_render_depth_stencil_state(ctx);
3391     i965_subpic_render_upload_constants(ctx, obj_surface);
3392     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3393 }
3394
3395 static void
3396 gen8_subpicture_render_setup_states(
3397     VADriverContextP   ctx,
3398     struct object_surface *obj_surface,
3399     const VARectangle *src_rect,
3400     const VARectangle *dst_rect
3401 )
3402 {
3403     i965_render_dest_surface_state(ctx, 0);
3404     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3405     gen8_render_sampler(ctx);
3406     i965_render_cc_viewport(ctx);
3407     gen7_render_color_calc_state(ctx);
3408     gen7_subpicture_render_blend_state(ctx);
3409     gen7_render_depth_stencil_state(ctx);
3410     i965_subpic_render_upload_constants(ctx, obj_surface);
3411     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3412 }
3413
3414 static void
3415 gen7_render_put_subpicture(
3416     VADriverContextP   ctx,
3417     struct object_surface *obj_surface,
3418     const VARectangle *src_rect,
3419     const VARectangle *dst_rect
3420 )
3421 {
3422     struct i965_driver_data *i965 = i965_driver_data(ctx);
3423     struct intel_batchbuffer *batch = i965->batch;
3424     unsigned int index = obj_surface->subpic_render_idx;
3425     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3426
3427     assert(obj_subpic);
3428     gen7_render_initialize(ctx);
3429     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3430     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3431     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3432     intel_batchbuffer_flush(batch);
3433 }
3434
3435 static void
3436 gen8_render_put_subpicture(
3437     VADriverContextP   ctx,
3438     struct object_surface *obj_surface,
3439     const VARectangle *src_rect,
3440     const VARectangle *dst_rect
3441 )
3442 {
3443     struct i965_driver_data *i965 = i965_driver_data(ctx);
3444     struct intel_batchbuffer *batch = i965->batch;
3445     unsigned int index = obj_surface->subpic_render_idx;
3446     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3447
3448     assert(obj_subpic);
3449     gen8_render_initialize(ctx);
3450     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3451     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3452     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3453     intel_batchbuffer_flush(batch);
3454 }
3455
3456 /*
3457  * global functions
3458  */
3459 VAStatus 
3460 i965_DestroySurfaces(VADriverContextP ctx,
3461                      VASurfaceID *surface_list,
3462                      int num_surfaces);
3463 void
3464 intel_render_put_surface(
3465     VADriverContextP   ctx,
3466     struct object_surface *obj_surface,
3467     const VARectangle *src_rect,
3468     const VARectangle *dst_rect,
3469     unsigned int       flags
3470 )
3471 {
3472     struct i965_driver_data *i965 = i965_driver_data(ctx);
3473     int has_done_scaling = 0;
3474     VASurfaceID out_surface_id = i965_post_processing(ctx,
3475                                                       obj_surface,
3476                                                       src_rect,
3477                                                       dst_rect,
3478                                                       flags,
3479                                                       &has_done_scaling);
3480
3481     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3482
3483     if (out_surface_id != VA_INVALID_ID) {
3484         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3485         
3486         if (new_obj_surface && new_obj_surface->bo)
3487             obj_surface = new_obj_surface;
3488
3489         if (has_done_scaling)
3490             src_rect = dst_rect;
3491     }
3492
3493     if (IS_GEN8(i965->intel.device_id))
3494         gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3495     else if (IS_GEN7(i965->intel.device_id))
3496         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3497     else if (IS_GEN6(i965->intel.device_id))
3498         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3499     else
3500         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3501
3502     if (out_surface_id != VA_INVALID_ID)
3503         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3504 }
3505
3506 void
3507 intel_render_put_subpicture(
3508     VADriverContextP   ctx,
3509     struct object_surface *obj_surface,
3510     const VARectangle *src_rect,
3511     const VARectangle *dst_rect
3512 )
3513 {
3514     struct i965_driver_data *i965 = i965_driver_data(ctx);
3515
3516     if (IS_GEN8(i965->intel.device_id))
3517         gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3518     else if (IS_GEN7(i965->intel.device_id))
3519         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3520     else if (IS_GEN6(i965->intel.device_id))
3521         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3522     else
3523         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3524 }
3525
3526 bool 
3527 i965_render_init(VADriverContextP ctx)
3528 {
3529     struct i965_driver_data *i965 = i965_driver_data(ctx);
3530     struct i965_render_state *render_state = &i965->render_state;
3531     int i;
3532
3533     /* kernel */
3534     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3535                                  sizeof(render_kernels_gen5[0])));
3536     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3537                                  sizeof(render_kernels_gen6[0])));
3538
3539     if (IS_GEN8(i965->intel.device_id)) {
3540         memcpy(render_state->render_kernels, render_kernels_gen8,
3541                         sizeof(render_state->render_kernels));
3542     } else  if (IS_GEN7(i965->intel.device_id)) 
3543         memcpy(render_state->render_kernels,
3544                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
3545                sizeof(render_state->render_kernels));
3546     else if (IS_GEN6(i965->intel.device_id))
3547         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3548     else if (IS_IRONLAKE(i965->intel.device_id))
3549         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3550     else
3551         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3552
3553     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3554         struct i965_kernel *kernel = &render_state->render_kernels[i];
3555
3556         if (!kernel->size)
3557             continue;
3558
3559         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3560                                   kernel->name, 
3561                                   kernel->size, 0x1000);
3562         assert(kernel->bo);
3563         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3564     }
3565
3566     /* constant buffer */
3567     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3568                       "constant buffer",
3569                       4096, 64);
3570     assert(render_state->curbe.bo);
3571
3572     if (IS_HSW_GT1(i965->intel.device_id)) {
3573         render_state->max_wm_threads = 102;
3574     } else if (IS_HSW_GT2(i965->intel.device_id)) {
3575         render_state->max_wm_threads = 204;
3576     } else if (IS_HSW_GT3(i965->intel.device_id)) {
3577         render_state->max_wm_threads = 408;
3578     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
3579         render_state->max_wm_threads = 48;
3580     } else if (IS_IVB_GT2(i965->intel.device_id)) {
3581         render_state->max_wm_threads = 172;
3582     } else if (IS_SNB_GT1(i965->intel.device_id)) {
3583         render_state->max_wm_threads = 40;
3584     } else if (IS_SNB_GT2(i965->intel.device_id)) {
3585         render_state->max_wm_threads = 80;
3586     } else if (IS_IRONLAKE(i965->intel.device_id)) {
3587         render_state->max_wm_threads = 72; /* 12 * 6 */
3588     } else if (IS_G4X(i965->intel.device_id)) {
3589         render_state->max_wm_threads = 50; /* 12 * 5 */
3590     } else {
3591         /* should never get here !!! */
3592         assert(0);
3593     }
3594
3595     return true;
3596 }
3597
3598 void 
3599 i965_render_terminate(VADriverContextP ctx)
3600 {
3601     int i;
3602     struct i965_driver_data *i965 = i965_driver_data(ctx);
3603     struct i965_render_state *render_state = &i965->render_state;
3604
3605     dri_bo_unreference(render_state->curbe.bo);
3606     render_state->curbe.bo = NULL;
3607
3608     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3609         struct i965_kernel *kernel = &render_state->render_kernels[i];
3610         
3611         dri_bo_unreference(kernel->bo);
3612         kernel->bo = NULL;
3613     }
3614
3615     dri_bo_unreference(render_state->vb.vertex_buffer);
3616     render_state->vb.vertex_buffer = NULL;
3617     dri_bo_unreference(render_state->vs.state);
3618     render_state->vs.state = NULL;
3619     dri_bo_unreference(render_state->sf.state);
3620     render_state->sf.state = NULL;
3621     dri_bo_unreference(render_state->wm.sampler);
3622     render_state->wm.sampler = NULL;
3623     dri_bo_unreference(render_state->wm.state);
3624     render_state->wm.state = NULL;
3625     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3626     dri_bo_unreference(render_state->cc.viewport);
3627     render_state->cc.viewport = NULL;
3628     dri_bo_unreference(render_state->cc.state);
3629     render_state->cc.state = NULL;
3630     dri_bo_unreference(render_state->cc.blend);
3631     render_state->cc.blend = NULL;
3632     dri_bo_unreference(render_state->cc.depth_stencil);
3633     render_state->cc.depth_stencil = NULL;
3634
3635     if (render_state->draw_region) {
3636         dri_bo_unreference(render_state->draw_region->bo);
3637         free(render_state->draw_region);
3638         render_state->draw_region = NULL;
3639     }
3640 }
3641