OSDN Git Service

Unify the coding style in the driver
[android-x86/hardware-intel-common-vaapi.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47 #include "i965_yuv_coefs.h"
48
49 #include "i965_render.h"
50 #include "i965_post_processing.h"
51
52 #define SF_KERNEL_NUM_GRF       16
53 #define SF_MAX_THREADS          1
54
55 static const uint32_t sf_kernel_static[][4] = {
56 #include "shaders/render/exa_sf.g4b"
57 };
58
59 #define PS_KERNEL_NUM_GRF       48
60 #define PS_MAX_THREADS          32
61
62 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
63
64 static const uint32_t ps_kernel_static[][4] = {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = {
85 #include "shaders/render/exa_wm_xy.g4b.gen5"
86 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
87 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
88 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = {
93 #include "shaders/render/exa_wm_xy.g4b.gen5"
94 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
95 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
96 #include "shaders/render/exa_wm_write.g4b.gen5"
97 };
98
99 /* programs for Sandybridge */
100 static const uint32_t sf_kernel_static_gen6[][4] = {
101 };
102
103 static const uint32_t ps_kernel_static_gen6[][4] = {
104 #include "shaders/render/exa_wm_src_affine.g6b"
105 #include "shaders/render/exa_wm_src_sample_planar.g6b"
106 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
107 #include "shaders/render/exa_wm_yuv_rgb.g6b"
108 #include "shaders/render/exa_wm_write.g6b"
109 };
110
111 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
112 #include "shaders/render/exa_wm_src_affine.g6b"
113 #include "shaders/render/exa_wm_src_sample_argb.g6b"
114 #include "shaders/render/exa_wm_write.g6b"
115 };
116
117 /* programs for Ivybridge */
118 static const uint32_t sf_kernel_static_gen7[][4] = {
119 };
120
121 static const uint32_t ps_kernel_static_gen7[][4] = {
122 #include "shaders/render/exa_wm_src_affine.g7b"
123 #include "shaders/render/exa_wm_src_sample_planar.g7b"
124 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
125 #include "shaders/render/exa_wm_yuv_rgb.g7b"
126 #include "shaders/render/exa_wm_write.g7b"
127 };
128
129 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
130 #include "shaders/render/exa_wm_src_affine.g7b"
131 #include "shaders/render/exa_wm_src_sample_argb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 /* Programs for Haswell */
136 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
137 #include "shaders/render/exa_wm_src_affine.g7b"
138 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
139 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
140 #include "shaders/render/exa_wm_yuv_rgb.g7b"
141 #include "shaders/render/exa_wm_write.g7b"
142 };
143
144
145 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
146
147 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
148 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
149
150 static uint32_t float_to_uint(float f)
151 {
152     union {
153         uint32_t i;
154         float f;
155     } x;
156
157     x.f = f;
158     return x.i;
159 }
160
161 enum {
162     SF_KERNEL = 0,
163     PS_KERNEL,
164     PS_SUBPIC_KERNEL
165 };
166
167 static struct i965_kernel render_kernels_gen4[] = {
168     {
169         "SF",
170         SF_KERNEL,
171         sf_kernel_static,
172         sizeof(sf_kernel_static),
173         NULL
174     },
175     {
176         "PS",
177         PS_KERNEL,
178         ps_kernel_static,
179         sizeof(ps_kernel_static),
180         NULL
181     },
182
183     {
184         "PS_SUBPIC",
185         PS_SUBPIC_KERNEL,
186         ps_subpic_kernel_static,
187         sizeof(ps_subpic_kernel_static),
188         NULL
189     }
190 };
191
192 static struct i965_kernel render_kernels_gen5[] = {
193     {
194         "SF",
195         SF_KERNEL,
196         sf_kernel_static_gen5,
197         sizeof(sf_kernel_static_gen5),
198         NULL
199     },
200     {
201         "PS",
202         PS_KERNEL,
203         ps_kernel_static_gen5,
204         sizeof(ps_kernel_static_gen5),
205         NULL
206     },
207
208     {
209         "PS_SUBPIC",
210         PS_SUBPIC_KERNEL,
211         ps_subpic_kernel_static_gen5,
212         sizeof(ps_subpic_kernel_static_gen5),
213         NULL
214     }
215 };
216
217 static struct i965_kernel render_kernels_gen6[] = {
218     {
219         "SF",
220         SF_KERNEL,
221         sf_kernel_static_gen6,
222         sizeof(sf_kernel_static_gen6),
223         NULL
224     },
225     {
226         "PS",
227         PS_KERNEL,
228         ps_kernel_static_gen6,
229         sizeof(ps_kernel_static_gen6),
230         NULL
231     },
232
233     {
234         "PS_SUBPIC",
235         PS_SUBPIC_KERNEL,
236         ps_subpic_kernel_static_gen6,
237         sizeof(ps_subpic_kernel_static_gen6),
238         NULL
239     }
240 };
241
242 static struct i965_kernel render_kernels_gen7[] = {
243     {
244         "SF",
245         SF_KERNEL,
246         sf_kernel_static_gen7,
247         sizeof(sf_kernel_static_gen7),
248         NULL
249     },
250     {
251         "PS",
252         PS_KERNEL,
253         ps_kernel_static_gen7,
254         sizeof(ps_kernel_static_gen7),
255         NULL
256     },
257
258     {
259         "PS_SUBPIC",
260         PS_SUBPIC_KERNEL,
261         ps_subpic_kernel_static_gen7,
262         sizeof(ps_subpic_kernel_static_gen7),
263         NULL
264     }
265 };
266
267 static struct i965_kernel render_kernels_gen7_haswell[] = {
268     {
269         "SF",
270         SF_KERNEL,
271         sf_kernel_static_gen7,
272         sizeof(sf_kernel_static_gen7),
273         NULL
274     },
275     {
276         "PS",
277         PS_KERNEL,
278         ps_kernel_static_gen7_haswell,
279         sizeof(ps_kernel_static_gen7_haswell),
280         NULL
281     },
282
283     {
284         "PS_SUBPIC",
285         PS_SUBPIC_KERNEL,
286         ps_subpic_kernel_static_gen7,
287         sizeof(ps_subpic_kernel_static_gen7),
288         NULL
289     }
290 };
291
292 #define URB_VS_ENTRIES        8
293 #define URB_VS_ENTRY_SIZE     1
294
295 #define URB_GS_ENTRIES        0
296 #define URB_GS_ENTRY_SIZE     0
297
298 #define URB_CLIP_ENTRIES      0
299 #define URB_CLIP_ENTRY_SIZE   0
300
301 #define URB_SF_ENTRIES        1
302 #define URB_SF_ENTRY_SIZE     2
303
304 #define URB_CS_ENTRIES        4
305 #define URB_CS_ENTRY_SIZE     4
306
307 static void
308 i965_render_vs_unit(VADriverContextP ctx)
309 {
310     struct i965_driver_data *i965 = i965_driver_data(ctx);
311     struct i965_render_state *render_state = &i965->render_state;
312     struct i965_vs_unit_state *vs_state;
313
314     dri_bo_map(render_state->vs.state, 1);
315     assert(render_state->vs.state->virtual);
316     vs_state = render_state->vs.state->virtual;
317     memset(vs_state, 0, sizeof(*vs_state));
318
319     if (IS_IRONLAKE(i965->intel.device_info))
320         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
321     else
322         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
323
324     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
325     vs_state->vs6.vs_enable = 0;
326     vs_state->vs6.vert_cache_disable = 1;
327
328     dri_bo_unmap(render_state->vs.state);
329 }
330
331 static void
332 i965_render_sf_unit(VADriverContextP ctx)
333 {
334     struct i965_driver_data *i965 = i965_driver_data(ctx);
335     struct i965_render_state *render_state = &i965->render_state;
336     struct i965_sf_unit_state *sf_state;
337
338     dri_bo_map(render_state->sf.state, 1);
339     assert(render_state->sf.state->virtual);
340     sf_state = render_state->sf.state->virtual;
341     memset(sf_state, 0, sizeof(*sf_state));
342
343     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
344     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
345
346     sf_state->sf1.single_program_flow = 1; /* XXX */
347     sf_state->sf1.binding_table_entry_count = 0;
348     sf_state->sf1.thread_priority = 0;
349     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
350     sf_state->sf1.illegal_op_exception_enable = 1;
351     sf_state->sf1.mask_stack_exception_enable = 1;
352     sf_state->sf1.sw_exception_enable = 1;
353
354     /* scratch space is not used in our kernel */
355     sf_state->thread2.per_thread_scratch_space = 0;
356     sf_state->thread2.scratch_space_base_pointer = 0;
357
358     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
359     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
360     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
361     sf_state->thread3.urb_entry_read_offset = 0;
362     sf_state->thread3.dispatch_grf_start_reg = 3;
363
364     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
365     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
366     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
367     sf_state->thread4.stats_enable = 1;
368
369     sf_state->sf5.viewport_transform = 0; /* skip viewport */
370
371     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
372     sf_state->sf6.scissor = 0;
373
374     sf_state->sf7.trifan_pv = 2;
375
376     sf_state->sf6.dest_org_vbias = 0x8;
377     sf_state->sf6.dest_org_hbias = 0x8;
378
379     dri_bo_emit_reloc(render_state->sf.state,
380                       I915_GEM_DOMAIN_INSTRUCTION, 0,
381                       sf_state->thread0.grf_reg_count << 1,
382                       offsetof(struct i965_sf_unit_state, thread0),
383                       render_state->render_kernels[SF_KERNEL].bo);
384
385     dri_bo_unmap(render_state->sf.state);
386 }
387
388 static void
389 i965_render_sampler(VADriverContextP ctx)
390 {
391     struct i965_driver_data *i965 = i965_driver_data(ctx);
392     struct i965_render_state *render_state = &i965->render_state;
393     struct i965_sampler_state *sampler_state;
394     int i;
395
396     assert(render_state->wm.sampler_count > 0);
397     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
398
399     dri_bo_map(render_state->wm.sampler, 1);
400     assert(render_state->wm.sampler->virtual);
401     sampler_state = render_state->wm.sampler->virtual;
402     for (i = 0; i < render_state->wm.sampler_count; i++) {
403         memset(sampler_state, 0, sizeof(*sampler_state));
404         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
405         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
406         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
407         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
408         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
409         sampler_state++;
410     }
411
412     dri_bo_unmap(render_state->wm.sampler);
413 }
414 static void
415 i965_subpic_render_wm_unit(VADriverContextP ctx)
416 {
417     struct i965_driver_data *i965 = i965_driver_data(ctx);
418     struct i965_render_state *render_state = &i965->render_state;
419     struct i965_wm_unit_state *wm_state;
420
421     assert(render_state->wm.sampler);
422
423     dri_bo_map(render_state->wm.state, 1);
424     assert(render_state->wm.state->virtual);
425     wm_state = render_state->wm.state->virtual;
426     memset(wm_state, 0, sizeof(*wm_state));
427
428     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
429     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
430
431     wm_state->thread1.single_program_flow = 1; /* XXX */
432
433     if (IS_IRONLAKE(i965->intel.device_info))
434         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
435     else
436         wm_state->thread1.binding_table_entry_count = 7;
437
438     wm_state->thread2.scratch_space_base_pointer = 0;
439     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
440
441     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
442     wm_state->thread3.const_urb_entry_read_length = 4;
443     wm_state->thread3.const_urb_entry_read_offset = 0;
444     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
445     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
446
447     wm_state->wm4.stats_enable = 0;
448     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
449
450     if (IS_IRONLAKE(i965->intel.device_info)) {
451         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
452     } else {
453         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
454     }
455
456     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
457     wm_state->wm5.thread_dispatch_enable = 1;
458     wm_state->wm5.enable_16_pix = 1;
459     wm_state->wm5.enable_8_pix = 0;
460     wm_state->wm5.early_depth_test = 1;
461
462     dri_bo_emit_reloc(render_state->wm.state,
463                       I915_GEM_DOMAIN_INSTRUCTION, 0,
464                       wm_state->thread0.grf_reg_count << 1,
465                       offsetof(struct i965_wm_unit_state, thread0),
466                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
467
468     dri_bo_emit_reloc(render_state->wm.state,
469                       I915_GEM_DOMAIN_INSTRUCTION, 0,
470                       wm_state->wm4.sampler_count << 2,
471                       offsetof(struct i965_wm_unit_state, wm4),
472                       render_state->wm.sampler);
473
474     dri_bo_unmap(render_state->wm.state);
475 }
476
477
478 static void
479 i965_render_wm_unit(VADriverContextP ctx)
480 {
481     struct i965_driver_data *i965 = i965_driver_data(ctx);
482     struct i965_render_state *render_state = &i965->render_state;
483     struct i965_wm_unit_state *wm_state;
484
485     assert(render_state->wm.sampler);
486
487     dri_bo_map(render_state->wm.state, 1);
488     assert(render_state->wm.state->virtual);
489     wm_state = render_state->wm.state->virtual;
490     memset(wm_state, 0, sizeof(*wm_state));
491
492     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
493     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
494
495     wm_state->thread1.single_program_flow = 1; /* XXX */
496
497     if (IS_IRONLAKE(i965->intel.device_info))
498         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
499     else
500         wm_state->thread1.binding_table_entry_count = 7;
501
502     wm_state->thread2.scratch_space_base_pointer = 0;
503     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
504
505     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
506     wm_state->thread3.const_urb_entry_read_length = 4;
507     wm_state->thread3.const_urb_entry_read_offset = 0;
508     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
509     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
510
511     wm_state->wm4.stats_enable = 0;
512     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
513
514     if (IS_IRONLAKE(i965->intel.device_info)) {
515         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
516     } else {
517         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
518     }
519
520     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
521     wm_state->wm5.thread_dispatch_enable = 1;
522     wm_state->wm5.enable_16_pix = 1;
523     wm_state->wm5.enable_8_pix = 0;
524     wm_state->wm5.early_depth_test = 1;
525
526     dri_bo_emit_reloc(render_state->wm.state,
527                       I915_GEM_DOMAIN_INSTRUCTION, 0,
528                       wm_state->thread0.grf_reg_count << 1,
529                       offsetof(struct i965_wm_unit_state, thread0),
530                       render_state->render_kernels[PS_KERNEL].bo);
531
532     dri_bo_emit_reloc(render_state->wm.state,
533                       I915_GEM_DOMAIN_INSTRUCTION, 0,
534                       wm_state->wm4.sampler_count << 2,
535                       offsetof(struct i965_wm_unit_state, wm4),
536                       render_state->wm.sampler);
537
538     dri_bo_unmap(render_state->wm.state);
539 }
540
541 static void
542 i965_render_cc_viewport(VADriverContextP ctx)
543 {
544     struct i965_driver_data *i965 = i965_driver_data(ctx);
545     struct i965_render_state *render_state = &i965->render_state;
546     struct i965_cc_viewport *cc_viewport;
547
548     dri_bo_map(render_state->cc.viewport, 1);
549     assert(render_state->cc.viewport->virtual);
550     cc_viewport = render_state->cc.viewport->virtual;
551     memset(cc_viewport, 0, sizeof(*cc_viewport));
552
553     cc_viewport->min_depth = -1.e35;
554     cc_viewport->max_depth = 1.e35;
555
556     dri_bo_unmap(render_state->cc.viewport);
557 }
558
559 static void
560 i965_subpic_render_cc_unit(VADriverContextP ctx)
561 {
562     struct i965_driver_data *i965 = i965_driver_data(ctx);
563     struct i965_render_state *render_state = &i965->render_state;
564     struct i965_cc_unit_state *cc_state;
565
566     assert(render_state->cc.viewport);
567
568     dri_bo_map(render_state->cc.state, 1);
569     assert(render_state->cc.state->virtual);
570     cc_state = render_state->cc.state->virtual;
571     memset(cc_state, 0, sizeof(*cc_state));
572
573     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
574     cc_state->cc2.depth_test = 0;       /* disable depth test */
575     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
576     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
577     cc_state->cc3.blend_enable = 1;     /* enable color blend */
578     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
579     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
580     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
581     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
582
583     cc_state->cc5.dither_enable = 0;    /* disable dither */
584     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
585     cc_state->cc5.statistics_enable = 1;
586     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
587     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
588     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
589
590     cc_state->cc6.clamp_post_alpha_blend = 0;
591     cc_state->cc6.clamp_pre_alpha_blend  = 0;
592
593     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
594     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
595     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
596     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
597
598     /*alpha test reference*/
599     cc_state->cc7.alpha_ref.f = 0.0 ;
600
601
602     dri_bo_emit_reloc(render_state->cc.state,
603                       I915_GEM_DOMAIN_INSTRUCTION, 0,
604                       0,
605                       offsetof(struct i965_cc_unit_state, cc4),
606                       render_state->cc.viewport);
607
608     dri_bo_unmap(render_state->cc.state);
609 }
610
611
612 static void
613 i965_render_cc_unit(VADriverContextP ctx)
614 {
615     struct i965_driver_data *i965 = i965_driver_data(ctx);
616     struct i965_render_state *render_state = &i965->render_state;
617     struct i965_cc_unit_state *cc_state;
618
619     assert(render_state->cc.viewport);
620
621     dri_bo_map(render_state->cc.state, 1);
622     assert(render_state->cc.state->virtual);
623     cc_state = render_state->cc.state->virtual;
624     memset(cc_state, 0, sizeof(*cc_state));
625
626     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
627     cc_state->cc2.depth_test = 0;       /* disable depth test */
628     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
629     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
630     cc_state->cc3.blend_enable = 0;     /* disable color blend */
631     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
632     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
633
634     cc_state->cc5.dither_enable = 0;    /* disable dither */
635     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
636     cc_state->cc5.statistics_enable = 1;
637     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
638     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
639     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
640
641     dri_bo_emit_reloc(render_state->cc.state,
642                       I915_GEM_DOMAIN_INSTRUCTION, 0,
643                       0,
644                       offsetof(struct i965_cc_unit_state, cc4),
645                       render_state->cc.viewport);
646
647     dri_bo_unmap(render_state->cc.state);
648 }
649
650 static void
651 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
652 {
653     switch (tiling) {
654     case I915_TILING_NONE:
655         ss->ss3.tiled_surface = 0;
656         ss->ss3.tile_walk = 0;
657         break;
658     case I915_TILING_X:
659         ss->ss3.tiled_surface = 1;
660         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
661         break;
662     case I915_TILING_Y:
663         ss->ss3.tiled_surface = 1;
664         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
665         break;
666     }
667 }
668
669 static void
670 i965_render_set_surface_state(
671     struct i965_surface_state *ss,
672     dri_bo                    *bo,
673     unsigned long              offset,
674     unsigned int               width,
675     unsigned int               height,
676     unsigned int               pitch,
677     unsigned int               format,
678     unsigned int               flags
679 )
680 {
681     unsigned int tiling;
682     unsigned int swizzle;
683
684     memset(ss, 0, sizeof(*ss));
685
686     switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
687     case VA_BOTTOM_FIELD:
688         ss->ss0.vert_line_stride_ofs = 1;
689         /* fall-through */
690     case VA_TOP_FIELD:
691         ss->ss0.vert_line_stride = 1;
692         height /= 2;
693         break;
694     }
695
696     ss->ss0.surface_type = I965_SURFACE_2D;
697     ss->ss0.surface_format = format;
698     ss->ss0.color_blend = 1;
699
700     ss->ss1.base_addr = bo->offset + offset;
701
702     ss->ss2.width = width - 1;
703     ss->ss2.height = height - 1;
704
705     ss->ss3.pitch = pitch - 1;
706
707     dri_bo_get_tiling(bo, &tiling, &swizzle);
708     i965_render_set_surface_tiling(ss, tiling);
709 }
710
711 static void
712 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
713 {
714     switch (tiling) {
715     case I915_TILING_NONE:
716         ss->ss0.tiled_surface = 0;
717         ss->ss0.tile_walk = 0;
718         break;
719     case I915_TILING_X:
720         ss->ss0.tiled_surface = 1;
721         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
722         break;
723     case I915_TILING_Y:
724         ss->ss0.tiled_surface = 1;
725         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
726         break;
727     }
728 }
729
730 /* Set "Shader Channel Select" */
731 void
732 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
733 {
734     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
735     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
736     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
737     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
738 }
739
740 static void
741 gen7_render_set_surface_state(
742     struct gen7_surface_state *ss,
743     dri_bo                    *bo,
744     unsigned long              offset,
745     int                        width,
746     int                        height,
747     int                        pitch,
748     int                        format,
749     unsigned int               flags
750 )
751 {
752     unsigned int tiling;
753     unsigned int swizzle;
754
755     memset(ss, 0, sizeof(*ss));
756
757     switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
758     case VA_BOTTOM_FIELD:
759         ss->ss0.vert_line_stride_ofs = 1;
760         /* fall-through */
761     case VA_TOP_FIELD:
762         ss->ss0.vert_line_stride = 1;
763         height /= 2;
764         break;
765     }
766
767     ss->ss0.surface_type = I965_SURFACE_2D;
768     ss->ss0.surface_format = format;
769
770     ss->ss1.base_addr = bo->offset + offset;
771
772     ss->ss2.width = width - 1;
773     ss->ss2.height = height - 1;
774
775     ss->ss3.pitch = pitch - 1;
776
777     dri_bo_get_tiling(bo, &tiling, &swizzle);
778     gen7_render_set_surface_tiling(ss, tiling);
779 }
780
781
782 static void
783 i965_render_src_surface_state(
784     VADriverContextP ctx,
785     int              index,
786     dri_bo          *region,
787     unsigned long    offset,
788     int              w,
789     int              h,
790     int              pitch,
791     int              format,
792     unsigned int     flags
793 )
794 {
795     struct i965_driver_data *i965 = i965_driver_data(ctx);
796     struct i965_render_state *render_state = &i965->render_state;
797     void *ss;
798     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
799
800     assert(index < MAX_RENDER_SURFACES);
801
802     dri_bo_map(ss_bo, 1);
803     assert(ss_bo->virtual);
804     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
805
806     if (IS_GEN7(i965->intel.device_info)) {
807         gen7_render_set_surface_state(ss,
808                                       region, offset,
809                                       w, h,
810                                       pitch, format, flags);
811         if (IS_HASWELL(i965->intel.device_info))
812             gen7_render_set_surface_scs(ss);
813         dri_bo_emit_reloc(ss_bo,
814                           I915_GEM_DOMAIN_SAMPLER, 0,
815                           offset,
816                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
817                           region);
818     } else {
819         i965_render_set_surface_state(ss,
820                                       region, offset,
821                                       w, h,
822                                       pitch, format, flags);
823         dri_bo_emit_reloc(ss_bo,
824                           I915_GEM_DOMAIN_SAMPLER, 0,
825                           offset,
826                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
827                           region);
828     }
829
830     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
831     dri_bo_unmap(ss_bo);
832     render_state->wm.sampler_count++;
833 }
834
835 static void
836 i965_render_src_surfaces_state(
837     VADriverContextP ctx,
838     struct object_surface *obj_surface,
839     unsigned int     flags
840 )
841 {
842     int region_pitch;
843     int rw, rh;
844     dri_bo *region;
845
846     region_pitch = obj_surface->width;
847     rw = obj_surface->orig_width;
848     rh = obj_surface->orig_height;
849     region = obj_surface->bo;
850
851     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
852     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
853
854     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
855         return;
856
857     if (obj_surface->fourcc == VA_FOURCC_NV12) {
858         i965_render_src_surface_state(ctx, 3, region,
859                                       region_pitch * obj_surface->y_cb_offset,
860                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
861                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
862         i965_render_src_surface_state(ctx, 4, region,
863                                       region_pitch * obj_surface->y_cb_offset,
864                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
865                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
866     } else {
867         i965_render_src_surface_state(ctx, 3, region,
868                                       region_pitch * obj_surface->y_cb_offset,
869                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
870                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
871         i965_render_src_surface_state(ctx, 4, region,
872                                       region_pitch * obj_surface->y_cb_offset,
873                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
874                                       I965_SURFACEFORMAT_R8_UNORM, flags);
875         i965_render_src_surface_state(ctx, 5, region,
876                                       region_pitch * obj_surface->y_cr_offset,
877                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
878                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
879         i965_render_src_surface_state(ctx, 6, region,
880                                       region_pitch * obj_surface->y_cr_offset,
881                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
882                                       I965_SURFACEFORMAT_R8_UNORM, flags);
883     }
884 }
885
886 static void
887 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
888                                       struct object_surface *obj_surface)
889 {
890     dri_bo *subpic_region;
891     unsigned int index = obj_surface->subpic_render_idx;
892     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
893     struct object_image *obj_image = obj_subpic->obj_image;
894
895     assert(obj_surface);
896     assert(obj_surface->bo);
897     subpic_region = obj_image->bo;
898     /*subpicture surface*/
899     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
900     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
901 }
902
903 static void
904 i965_render_dest_surface_state(VADriverContextP ctx, int index)
905 {
906     struct i965_driver_data *i965 = i965_driver_data(ctx);
907     struct i965_render_state *render_state = &i965->render_state;
908     struct intel_region *dest_region = render_state->draw_region;
909     void *ss;
910     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
911     int format;
912     assert(index < MAX_RENDER_SURFACES);
913
914     if (dest_region->cpp == 2) {
915         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
916     } else {
917         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
918     }
919
920     dri_bo_map(ss_bo, 1);
921     assert(ss_bo->virtual);
922     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
923
924     if (IS_GEN7(i965->intel.device_info)) {
925         gen7_render_set_surface_state(ss,
926                                       dest_region->bo, 0,
927                                       dest_region->width, dest_region->height,
928                                       dest_region->pitch, format, 0);
929         if (IS_HASWELL(i965->intel.device_info))
930             gen7_render_set_surface_scs(ss);
931         dri_bo_emit_reloc(ss_bo,
932                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
933                           0,
934                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
935                           dest_region->bo);
936     } else {
937         i965_render_set_surface_state(ss,
938                                       dest_region->bo, 0,
939                                       dest_region->width, dest_region->height,
940                                       dest_region->pitch, format, 0);
941         dri_bo_emit_reloc(ss_bo,
942                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
943                           0,
944                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
945                           dest_region->bo);
946     }
947
948     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
949     dri_bo_unmap(ss_bo);
950 }
951
952 static void
953 i965_fill_vertex_buffer(
954     VADriverContextP ctx,
955     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
956     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
957 )
958 {
959     struct i965_driver_data * const i965 = i965_driver_data(ctx);
960     float vb[12];
961
962     enum { X1, Y1, X2, Y2 };
963
964     static const unsigned int g_rotation_indices[][6] = {
965         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
966         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
967         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
968         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
969     };
970
971     const unsigned int * const rotation_indices =
972         g_rotation_indices[i965->rotation_attrib->value];
973
974     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
975     vb[1]  = tex_coords[rotation_indices[1]];
976     vb[2]  = vid_coords[X2];
977     vb[3]  = vid_coords[Y2];
978
979     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
980     vb[5]  = tex_coords[rotation_indices[3]];
981     vb[6]  = vid_coords[X1];
982     vb[7]  = vid_coords[Y2];
983
984     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
985     vb[9]  = tex_coords[rotation_indices[5]];
986     vb[10] = vid_coords[X1];
987     vb[11] = vid_coords[Y1];
988
989     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
990 }
991
992 static void
993 i965_subpic_render_upload_vertex(VADriverContextP ctx,
994                                  struct object_surface *obj_surface,
995                                  const VARectangle *output_rect)
996 {
997     unsigned int index = obj_surface->subpic_render_idx;
998     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
999     float tex_coords[4], vid_coords[4];
1000     VARectangle dst_rect;
1001
1002     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1003         dst_rect = obj_subpic->dst_rect;
1004     else {
1005         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1006         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1007         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1008         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1009         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1010         dst_rect.height = sy * obj_subpic->dst_rect.height;
1011     }
1012
1013     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1014     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1015     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1016     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1017
1018     vid_coords[0] = dst_rect.x;
1019     vid_coords[1] = dst_rect.y;
1020     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1021     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1022
1023     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1024 }
1025
1026 static void
1027 i965_render_upload_vertex(
1028     VADriverContextP   ctx,
1029     struct object_surface *obj_surface,
1030     const VARectangle *src_rect,
1031     const VARectangle *dst_rect
1032 )
1033 {
1034     struct i965_driver_data *i965 = i965_driver_data(ctx);
1035     struct i965_render_state *render_state = &i965->render_state;
1036     struct intel_region *dest_region = render_state->draw_region;
1037     float tex_coords[4], vid_coords[4];
1038     int width, height;
1039
1040     width  = obj_surface->orig_width;
1041     height = obj_surface->orig_height;
1042
1043     tex_coords[0] = (float)src_rect->x / width;
1044     tex_coords[1] = (float)src_rect->y / height;
1045     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1046     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1047
1048     vid_coords[0] = dest_region->x + dst_rect->x;
1049     vid_coords[1] = dest_region->y + dst_rect->y;
1050     vid_coords[2] = vid_coords[0] + dst_rect->width;
1051     vid_coords[3] = vid_coords[1] + dst_rect->height;
1052
1053     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1054 }
1055
1056 #define PI  3.1415926
1057
1058 static void
1059 i965_render_upload_constants(VADriverContextP ctx,
1060                              struct object_surface *obj_surface,
1061                              unsigned int flags)
1062 {
1063     struct i965_driver_data *i965 = i965_driver_data(ctx);
1064     struct i965_render_state *render_state = &i965->render_state;
1065     unsigned short *constant_buffer;
1066     float *color_balance_base;
1067     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1068     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1069     float hue = (float)i965->hue_attrib->value / 180 * PI;
1070     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1071     float *yuv_to_rgb;
1072     const float* yuv_coefs;
1073     size_t coefs_length;
1074
1075     dri_bo_map(render_state->curbe.bo, 1);
1076     assert(render_state->curbe.bo->virtual);
1077     constant_buffer = render_state->curbe.bo->virtual;
1078
1079     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1080         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1081
1082         constant_buffer[0] = 2;
1083     } else {
1084         if (obj_surface->fourcc == VA_FOURCC_NV12)
1085             constant_buffer[0] = 1;
1086         else
1087             constant_buffer[0] = 0;
1088     }
1089
1090     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1091         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1092         i965->hue_attrib->value == DEFAULT_HUE &&
1093         i965->saturation_attrib->value == DEFAULT_SATURATION)
1094         constant_buffer[1] = 1; /* skip color balance transformation */
1095     else
1096         constant_buffer[1] = 0;
1097
1098     color_balance_base = (float *)constant_buffer + 4;
1099     *color_balance_base++ = contrast;
1100     *color_balance_base++ = brightness;
1101     *color_balance_base++ = cos(hue) * contrast * saturation;
1102     *color_balance_base++ = sin(hue) * contrast * saturation;
1103
1104     yuv_to_rgb = (float *)constant_buffer + 8;
1105     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(flags & VA_SRC_COLOR_MASK),
1106                                              &coefs_length);
1107     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
1108
1109     dri_bo_unmap(render_state->curbe.bo);
1110 }
1111
1112 static void
1113 i965_subpic_render_upload_constants(VADriverContextP ctx,
1114                                     struct object_surface *obj_surface)
1115 {
1116     struct i965_driver_data *i965 = i965_driver_data(ctx);
1117     struct i965_render_state *render_state = &i965->render_state;
1118     float *constant_buffer;
1119     float global_alpha = 1.0;
1120     unsigned int index = obj_surface->subpic_render_idx;
1121     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1122
1123     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1124         global_alpha = obj_subpic->global_alpha;
1125     }
1126
1127     dri_bo_map(render_state->curbe.bo, 1);
1128
1129     assert(render_state->curbe.bo->virtual);
1130     constant_buffer = render_state->curbe.bo->virtual;
1131     *constant_buffer = global_alpha;
1132
1133     dri_bo_unmap(render_state->curbe.bo);
1134 }
1135
1136 static void
1137 i965_surface_render_state_setup(
1138     VADriverContextP   ctx,
1139     struct object_surface *obj_surface,
1140     const VARectangle *src_rect,
1141     const VARectangle *dst_rect,
1142     unsigned int       flags
1143 )
1144 {
1145     i965_render_vs_unit(ctx);
1146     i965_render_sf_unit(ctx);
1147     i965_render_dest_surface_state(ctx, 0);
1148     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1149     i965_render_sampler(ctx);
1150     i965_render_wm_unit(ctx);
1151     i965_render_cc_viewport(ctx);
1152     i965_render_cc_unit(ctx);
1153     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1154     i965_render_upload_constants(ctx, obj_surface, flags);
1155 }
1156
1157 static void
1158 i965_subpic_render_state_setup(
1159     VADriverContextP   ctx,
1160     struct object_surface *obj_surface,
1161     const VARectangle *src_rect,
1162     const VARectangle *dst_rect
1163 )
1164 {
1165     i965_render_vs_unit(ctx);
1166     i965_render_sf_unit(ctx);
1167     i965_render_dest_surface_state(ctx, 0);
1168     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1169     i965_render_sampler(ctx);
1170     i965_subpic_render_wm_unit(ctx);
1171     i965_render_cc_viewport(ctx);
1172     i965_subpic_render_cc_unit(ctx);
1173     i965_subpic_render_upload_constants(ctx, obj_surface);
1174     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1175 }
1176
1177
1178 static void
1179 i965_render_pipeline_select(VADriverContextP ctx)
1180 {
1181     struct i965_driver_data *i965 = i965_driver_data(ctx);
1182     struct intel_batchbuffer *batch = i965->batch;
1183
1184     BEGIN_BATCH(batch, 1);
1185     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1186     ADVANCE_BATCH(batch);
1187 }
1188
1189 static void
1190 i965_render_state_sip(VADriverContextP ctx)
1191 {
1192     struct i965_driver_data *i965 = i965_driver_data(ctx);
1193     struct intel_batchbuffer *batch = i965->batch;
1194
1195     BEGIN_BATCH(batch, 2);
1196     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1197     OUT_BATCH(batch, 0);
1198     ADVANCE_BATCH(batch);
1199 }
1200
1201 static void
1202 i965_render_state_base_address(VADriverContextP ctx)
1203 {
1204     struct i965_driver_data *i965 = i965_driver_data(ctx);
1205     struct intel_batchbuffer *batch = i965->batch;
1206     struct i965_render_state *render_state = &i965->render_state;
1207
1208     if (IS_IRONLAKE(i965->intel.device_info)) {
1209         BEGIN_BATCH(batch, 8);
1210         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1211         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1212         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1213         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1214         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1215         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1216         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1217         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1218         ADVANCE_BATCH(batch);
1219     } else {
1220         BEGIN_BATCH(batch, 6);
1221         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1222         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1223         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1224         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1225         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1226         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1227         ADVANCE_BATCH(batch);
1228     }
1229 }
1230
1231 static void
1232 i965_render_binding_table_pointers(VADriverContextP ctx)
1233 {
1234     struct i965_driver_data *i965 = i965_driver_data(ctx);
1235     struct intel_batchbuffer *batch = i965->batch;
1236
1237     BEGIN_BATCH(batch, 6);
1238     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1239     OUT_BATCH(batch, 0); /* vs */
1240     OUT_BATCH(batch, 0); /* gs */
1241     OUT_BATCH(batch, 0); /* clip */
1242     OUT_BATCH(batch, 0); /* sf */
1243     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1244     ADVANCE_BATCH(batch);
1245 }
1246
1247 static void
1248 i965_render_constant_color(VADriverContextP ctx)
1249 {
1250     struct i965_driver_data *i965 = i965_driver_data(ctx);
1251     struct intel_batchbuffer *batch = i965->batch;
1252
1253     BEGIN_BATCH(batch, 5);
1254     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1255     OUT_BATCH(batch, float_to_uint(1.0));
1256     OUT_BATCH(batch, float_to_uint(0.0));
1257     OUT_BATCH(batch, float_to_uint(1.0));
1258     OUT_BATCH(batch, float_to_uint(1.0));
1259     ADVANCE_BATCH(batch);
1260 }
1261
1262 static void
1263 i965_render_pipelined_pointers(VADriverContextP ctx)
1264 {
1265     struct i965_driver_data *i965 = i965_driver_data(ctx);
1266     struct intel_batchbuffer *batch = i965->batch;
1267     struct i965_render_state *render_state = &i965->render_state;
1268
1269     BEGIN_BATCH(batch, 7);
1270     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1271     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1272     OUT_BATCH(batch, 0);  /* disable GS */
1273     OUT_BATCH(batch, 0);  /* disable CLIP */
1274     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1275     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1276     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1277     ADVANCE_BATCH(batch);
1278 }
1279
1280 static void
1281 i965_render_urb_layout(VADriverContextP ctx)
1282 {
1283     struct i965_driver_data *i965 = i965_driver_data(ctx);
1284     struct intel_batchbuffer *batch = i965->batch;
1285     int urb_vs_start, urb_vs_size;
1286     int urb_gs_start, urb_gs_size;
1287     int urb_clip_start, urb_clip_size;
1288     int urb_sf_start, urb_sf_size;
1289     int urb_cs_start, urb_cs_size;
1290
1291     urb_vs_start = 0;
1292     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1293     urb_gs_start = urb_vs_start + urb_vs_size;
1294     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1295     urb_clip_start = urb_gs_start + urb_gs_size;
1296     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1297     urb_sf_start = urb_clip_start + urb_clip_size;
1298     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1299     urb_cs_start = urb_sf_start + urb_sf_size;
1300     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1301
1302     BEGIN_BATCH(batch, 3);
1303     OUT_BATCH(batch,
1304               CMD_URB_FENCE |
1305               UF0_CS_REALLOC |
1306               UF0_SF_REALLOC |
1307               UF0_CLIP_REALLOC |
1308               UF0_GS_REALLOC |
1309               UF0_VS_REALLOC |
1310               1);
1311     OUT_BATCH(batch,
1312               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1313               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1314               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1315     OUT_BATCH(batch,
1316               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1317               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1318     ADVANCE_BATCH(batch);
1319 }
1320
1321 static void
1322 i965_render_cs_urb_layout(VADriverContextP ctx)
1323 {
1324     struct i965_driver_data *i965 = i965_driver_data(ctx);
1325     struct intel_batchbuffer *batch = i965->batch;
1326
1327     BEGIN_BATCH(batch, 2);
1328     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1329     OUT_BATCH(batch,
1330               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1331               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1332     ADVANCE_BATCH(batch);
1333 }
1334
1335 static void
1336 i965_render_constant_buffer(VADriverContextP ctx)
1337 {
1338     struct i965_driver_data *i965 = i965_driver_data(ctx);
1339     struct intel_batchbuffer *batch = i965->batch;
1340     struct i965_render_state *render_state = &i965->render_state;
1341
1342     BEGIN_BATCH(batch, 2);
1343     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1344     OUT_RELOC(batch, render_state->curbe.bo,
1345               I915_GEM_DOMAIN_INSTRUCTION, 0,
1346               URB_CS_ENTRY_SIZE - 1);
1347     ADVANCE_BATCH(batch);
1348 }
1349
1350 static void
1351 i965_render_drawing_rectangle(VADriverContextP ctx)
1352 {
1353     struct i965_driver_data *i965 = i965_driver_data(ctx);
1354     struct intel_batchbuffer *batch = i965->batch;
1355     struct i965_render_state *render_state = &i965->render_state;
1356     struct intel_region *dest_region = render_state->draw_region;
1357
1358     BEGIN_BATCH(batch, 4);
1359     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1360     OUT_BATCH(batch, 0x00000000);
1361     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1362     OUT_BATCH(batch, 0x00000000);
1363     ADVANCE_BATCH(batch);
1364 }
1365
1366 static void
1367 i965_render_vertex_elements(VADriverContextP ctx)
1368 {
1369     struct i965_driver_data *i965 = i965_driver_data(ctx);
1370     struct intel_batchbuffer *batch = i965->batch;
1371
1372     if (IS_IRONLAKE(i965->intel.device_info)) {
1373         BEGIN_BATCH(batch, 5);
1374         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1375         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1376         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1377                   VE0_VALID |
1378                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1379                   (0 << VE0_OFFSET_SHIFT));
1380         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1381                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1382                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1383                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1384         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1385         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1386                   VE0_VALID |
1387                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1388                   (8 << VE0_OFFSET_SHIFT));
1389         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1390                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1391                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1392                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1393         ADVANCE_BATCH(batch);
1394     } else {
1395         BEGIN_BATCH(batch, 5);
1396         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1397         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1398         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1399                   VE0_VALID |
1400                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1401                   (0 << VE0_OFFSET_SHIFT));
1402         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1403                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1404                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1405                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1406                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1407         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1408         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1409                   VE0_VALID |
1410                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1411                   (8 << VE0_OFFSET_SHIFT));
1412         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1413                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1414                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1415                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1416                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1417         ADVANCE_BATCH(batch);
1418     }
1419 }
1420
1421 static void
1422 i965_render_upload_image_palette(
1423     VADriverContextP ctx,
1424     struct object_image *obj_image,
1425     unsigned int     alpha
1426 )
1427 {
1428     struct i965_driver_data *i965 = i965_driver_data(ctx);
1429     struct intel_batchbuffer *batch = i965->batch;
1430     unsigned int i;
1431
1432     assert(obj_image);
1433
1434     if (!obj_image)
1435         return;
1436
1437     if (obj_image->image.num_palette_entries == 0)
1438         return;
1439
1440     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1441     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1442     /*fill palette*/
1443     //int32_t out[16]; //0-23:color 23-31:alpha
1444     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1445         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1446     ADVANCE_BATCH(batch);
1447 }
1448
1449 static void
1450 i965_render_startup(VADriverContextP ctx)
1451 {
1452     struct i965_driver_data *i965 = i965_driver_data(ctx);
1453     struct intel_batchbuffer *batch = i965->batch;
1454     struct i965_render_state *render_state = &i965->render_state;
1455
1456     BEGIN_BATCH(batch, 11);
1457     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1458     OUT_BATCH(batch,
1459               (0 << VB0_BUFFER_INDEX_SHIFT) |
1460               VB0_VERTEXDATA |
1461               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1462     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1463
1464     if (IS_IRONLAKE(i965->intel.device_info))
1465         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1466     else
1467         OUT_BATCH(batch, 3);
1468
1469     OUT_BATCH(batch, 0);
1470
1471     OUT_BATCH(batch,
1472               CMD_3DPRIMITIVE |
1473               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1474               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1475               (0 << 9) |
1476               4);
1477     OUT_BATCH(batch, 3); /* vertex count per instance */
1478     OUT_BATCH(batch, 0); /* start vertex offset */
1479     OUT_BATCH(batch, 1); /* single instance */
1480     OUT_BATCH(batch, 0); /* start instance location */
1481     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1482     ADVANCE_BATCH(batch);
1483 }
1484
1485 static void
1486 i965_clear_dest_region(VADriverContextP ctx)
1487 {
1488     struct i965_driver_data *i965 = i965_driver_data(ctx);
1489     struct intel_batchbuffer *batch = i965->batch;
1490     struct i965_render_state *render_state = &i965->render_state;
1491     struct intel_region *dest_region = render_state->draw_region;
1492     unsigned int blt_cmd, br13;
1493     int pitch;
1494
1495     blt_cmd = XY_COLOR_BLT_CMD;
1496     br13 = 0xf0 << 16;
1497     pitch = dest_region->pitch;
1498
1499     if (dest_region->cpp == 4) {
1500         br13 |= BR13_8888;
1501         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1502     } else {
1503         assert(dest_region->cpp == 2);
1504         br13 |= BR13_565;
1505     }
1506
1507     if (dest_region->tiling != I915_TILING_NONE) {
1508         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1509         pitch /= 4;
1510     }
1511
1512     br13 |= pitch;
1513
1514     if (IS_GEN6(i965->intel.device_info) ||
1515         IS_GEN7(i965->intel.device_info)) {
1516         intel_batchbuffer_start_atomic_blt(batch, 24);
1517         BEGIN_BLT_BATCH(batch, 6);
1518     } else {
1519         intel_batchbuffer_start_atomic(batch, 24);
1520         BEGIN_BATCH(batch, 6);
1521     }
1522
1523     OUT_BATCH(batch, blt_cmd);
1524     OUT_BATCH(batch, br13);
1525     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1526     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1527               (dest_region->x + dest_region->width));
1528     OUT_RELOC(batch, dest_region->bo,
1529               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1530               0);
1531     OUT_BATCH(batch, 0x0);
1532     ADVANCE_BATCH(batch);
1533     intel_batchbuffer_end_atomic(batch);
1534 }
1535
1536 static void
1537 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1538 {
1539     struct i965_driver_data *i965 = i965_driver_data(ctx);
1540     struct intel_batchbuffer *batch = i965->batch;
1541
1542     i965_clear_dest_region(ctx);
1543     intel_batchbuffer_start_atomic(batch, 0x1000);
1544     intel_batchbuffer_emit_mi_flush(batch);
1545     i965_render_pipeline_select(ctx);
1546     i965_render_state_sip(ctx);
1547     i965_render_state_base_address(ctx);
1548     i965_render_binding_table_pointers(ctx);
1549     i965_render_constant_color(ctx);
1550     i965_render_pipelined_pointers(ctx);
1551     i965_render_urb_layout(ctx);
1552     i965_render_cs_urb_layout(ctx);
1553     i965_render_constant_buffer(ctx);
1554     i965_render_drawing_rectangle(ctx);
1555     i965_render_vertex_elements(ctx);
1556     i965_render_startup(ctx);
1557     intel_batchbuffer_end_atomic(batch);
1558 }
1559
1560 static void
1561 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1562 {
1563     struct i965_driver_data *i965 = i965_driver_data(ctx);
1564     struct intel_batchbuffer *batch = i965->batch;
1565
1566     intel_batchbuffer_start_atomic(batch, 0x1000);
1567     intel_batchbuffer_emit_mi_flush(batch);
1568     i965_render_pipeline_select(ctx);
1569     i965_render_state_sip(ctx);
1570     i965_render_state_base_address(ctx);
1571     i965_render_binding_table_pointers(ctx);
1572     i965_render_constant_color(ctx);
1573     i965_render_pipelined_pointers(ctx);
1574     i965_render_urb_layout(ctx);
1575     i965_render_cs_urb_layout(ctx);
1576     i965_render_constant_buffer(ctx);
1577     i965_render_drawing_rectangle(ctx);
1578     i965_render_vertex_elements(ctx);
1579     i965_render_startup(ctx);
1580     intel_batchbuffer_end_atomic(batch);
1581 }
1582
1583
1584 static void
1585 i965_render_initialize(VADriverContextP ctx)
1586 {
1587     struct i965_driver_data *i965 = i965_driver_data(ctx);
1588     struct i965_render_state *render_state = &i965->render_state;
1589     dri_bo *bo;
1590
1591     /* VERTEX BUFFER */
1592     dri_bo_unreference(render_state->vb.vertex_buffer);
1593     bo = dri_bo_alloc(i965->intel.bufmgr,
1594                       "vertex buffer",
1595                       4096,
1596                       4096);
1597     assert(bo);
1598     render_state->vb.vertex_buffer = bo;
1599
1600     /* VS */
1601     dri_bo_unreference(render_state->vs.state);
1602     bo = dri_bo_alloc(i965->intel.bufmgr,
1603                       "vs state",
1604                       sizeof(struct i965_vs_unit_state),
1605                       64);
1606     assert(bo);
1607     render_state->vs.state = bo;
1608
1609     /* GS */
1610     /* CLIP */
1611     /* SF */
1612     dri_bo_unreference(render_state->sf.state);
1613     bo = dri_bo_alloc(i965->intel.bufmgr,
1614                       "sf state",
1615                       sizeof(struct i965_sf_unit_state),
1616                       64);
1617     assert(bo);
1618     render_state->sf.state = bo;
1619
1620     /* WM */
1621     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1622     bo = dri_bo_alloc(i965->intel.bufmgr,
1623                       "surface state & binding table",
1624                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1625                       4096);
1626     assert(bo);
1627     render_state->wm.surface_state_binding_table_bo = bo;
1628
1629     dri_bo_unreference(render_state->wm.sampler);
1630     bo = dri_bo_alloc(i965->intel.bufmgr,
1631                       "sampler state",
1632                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1633                       64);
1634     assert(bo);
1635     render_state->wm.sampler = bo;
1636     render_state->wm.sampler_count = 0;
1637
1638     dri_bo_unreference(render_state->wm.state);
1639     bo = dri_bo_alloc(i965->intel.bufmgr,
1640                       "wm state",
1641                       sizeof(struct i965_wm_unit_state),
1642                       64);
1643     assert(bo);
1644     render_state->wm.state = bo;
1645
1646     /* COLOR CALCULATOR */
1647     dri_bo_unreference(render_state->cc.state);
1648     bo = dri_bo_alloc(i965->intel.bufmgr,
1649                       "color calc state",
1650                       sizeof(struct i965_cc_unit_state),
1651                       64);
1652     assert(bo);
1653     render_state->cc.state = bo;
1654
1655     dri_bo_unreference(render_state->cc.viewport);
1656     bo = dri_bo_alloc(i965->intel.bufmgr,
1657                       "cc viewport",
1658                       sizeof(struct i965_cc_viewport),
1659                       64);
1660     assert(bo);
1661     render_state->cc.viewport = bo;
1662 }
1663
1664 static void
1665 i965_render_put_surface(
1666     VADriverContextP   ctx,
1667     struct object_surface *obj_surface,
1668     const VARectangle *src_rect,
1669     const VARectangle *dst_rect,
1670     unsigned int       flags
1671 )
1672 {
1673     struct i965_driver_data *i965 = i965_driver_data(ctx);
1674     struct intel_batchbuffer *batch = i965->batch;
1675
1676     i965_render_initialize(ctx);
1677     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1678     i965_surface_render_pipeline_setup(ctx);
1679     intel_batchbuffer_flush(batch);
1680 }
1681
1682 static void
1683 i965_render_put_subpicture(
1684     VADriverContextP   ctx,
1685     struct object_surface *obj_surface,
1686     const VARectangle *src_rect,
1687     const VARectangle *dst_rect
1688 )
1689 {
1690     struct i965_driver_data *i965 = i965_driver_data(ctx);
1691     struct intel_batchbuffer *batch = i965->batch;
1692     unsigned int index = obj_surface->subpic_render_idx;
1693     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1694
1695     assert(obj_subpic);
1696
1697     i965_render_initialize(ctx);
1698     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1699     i965_subpic_render_pipeline_setup(ctx);
1700     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1701     intel_batchbuffer_flush(batch);
1702 }
1703
1704 /*
1705  * for GEN6+
1706  */
1707 static void
1708 gen6_render_initialize(VADriverContextP ctx)
1709 {
1710     struct i965_driver_data *i965 = i965_driver_data(ctx);
1711     struct i965_render_state *render_state = &i965->render_state;
1712     dri_bo *bo;
1713
1714     /* VERTEX BUFFER */
1715     dri_bo_unreference(render_state->vb.vertex_buffer);
1716     bo = dri_bo_alloc(i965->intel.bufmgr,
1717                       "vertex buffer",
1718                       4096,
1719                       4096);
1720     assert(bo);
1721     render_state->vb.vertex_buffer = bo;
1722
1723     /* WM */
1724     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1725     bo = dri_bo_alloc(i965->intel.bufmgr,
1726                       "surface state & binding table",
1727                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1728                       4096);
1729     assert(bo);
1730     render_state->wm.surface_state_binding_table_bo = bo;
1731
1732     dri_bo_unreference(render_state->wm.sampler);
1733     bo = dri_bo_alloc(i965->intel.bufmgr,
1734                       "sampler state",
1735                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1736                       4096);
1737     assert(bo);
1738     render_state->wm.sampler = bo;
1739     render_state->wm.sampler_count = 0;
1740
1741     /* COLOR CALCULATOR */
1742     dri_bo_unreference(render_state->cc.state);
1743     bo = dri_bo_alloc(i965->intel.bufmgr,
1744                       "color calc state",
1745                       sizeof(struct gen6_color_calc_state),
1746                       4096);
1747     assert(bo);
1748     render_state->cc.state = bo;
1749
1750     /* CC VIEWPORT */
1751     dri_bo_unreference(render_state->cc.viewport);
1752     bo = dri_bo_alloc(i965->intel.bufmgr,
1753                       "cc viewport",
1754                       sizeof(struct i965_cc_viewport),
1755                       4096);
1756     assert(bo);
1757     render_state->cc.viewport = bo;
1758
1759     /* BLEND STATE */
1760     dri_bo_unreference(render_state->cc.blend);
1761     bo = dri_bo_alloc(i965->intel.bufmgr,
1762                       "blend state",
1763                       sizeof(struct gen6_blend_state),
1764                       4096);
1765     assert(bo);
1766     render_state->cc.blend = bo;
1767
1768     /* DEPTH & STENCIL STATE */
1769     dri_bo_unreference(render_state->cc.depth_stencil);
1770     bo = dri_bo_alloc(i965->intel.bufmgr,
1771                       "depth & stencil state",
1772                       sizeof(struct gen6_depth_stencil_state),
1773                       4096);
1774     assert(bo);
1775     render_state->cc.depth_stencil = bo;
1776 }
1777
1778 static void
1779 gen6_render_color_calc_state(VADriverContextP ctx)
1780 {
1781     struct i965_driver_data *i965 = i965_driver_data(ctx);
1782     struct i965_render_state *render_state = &i965->render_state;
1783     struct gen6_color_calc_state *color_calc_state;
1784
1785     dri_bo_map(render_state->cc.state, 1);
1786     assert(render_state->cc.state->virtual);
1787     color_calc_state = render_state->cc.state->virtual;
1788     memset(color_calc_state, 0, sizeof(*color_calc_state));
1789     color_calc_state->constant_r = 1.0;
1790     color_calc_state->constant_g = 0.0;
1791     color_calc_state->constant_b = 1.0;
1792     color_calc_state->constant_a = 1.0;
1793     dri_bo_unmap(render_state->cc.state);
1794 }
1795
1796 static void
1797 gen6_render_blend_state(VADriverContextP ctx)
1798 {
1799     struct i965_driver_data *i965 = i965_driver_data(ctx);
1800     struct i965_render_state *render_state = &i965->render_state;
1801     struct gen6_blend_state *blend_state;
1802
1803     dri_bo_map(render_state->cc.blend, 1);
1804     assert(render_state->cc.blend->virtual);
1805     blend_state = render_state->cc.blend->virtual;
1806     memset(blend_state, 0, sizeof(*blend_state));
1807     blend_state->blend1.logic_op_enable = 1;
1808     blend_state->blend1.logic_op_func = 0xc;
1809     dri_bo_unmap(render_state->cc.blend);
1810 }
1811
1812 static void
1813 gen6_render_depth_stencil_state(VADriverContextP ctx)
1814 {
1815     struct i965_driver_data *i965 = i965_driver_data(ctx);
1816     struct i965_render_state *render_state = &i965->render_state;
1817     struct gen6_depth_stencil_state *depth_stencil_state;
1818
1819     dri_bo_map(render_state->cc.depth_stencil, 1);
1820     assert(render_state->cc.depth_stencil->virtual);
1821     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1822     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1823     dri_bo_unmap(render_state->cc.depth_stencil);
1824 }
1825
1826 static void
1827 gen6_render_setup_states(
1828     VADriverContextP   ctx,
1829     struct object_surface *obj_surface,
1830     const VARectangle *src_rect,
1831     const VARectangle *dst_rect,
1832     unsigned int       flags
1833 )
1834 {
1835     i965_render_dest_surface_state(ctx, 0);
1836     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1837     i965_render_sampler(ctx);
1838     i965_render_cc_viewport(ctx);
1839     gen6_render_color_calc_state(ctx);
1840     gen6_render_blend_state(ctx);
1841     gen6_render_depth_stencil_state(ctx);
1842     i965_render_upload_constants(ctx, obj_surface, flags);
1843     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1844 }
1845
1846 static void
1847 gen6_emit_invarient_states(VADriverContextP ctx)
1848 {
1849     struct i965_driver_data *i965 = i965_driver_data(ctx);
1850     struct intel_batchbuffer *batch = i965->batch;
1851
1852     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1853
1854     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1855     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1856               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1857     OUT_BATCH(batch, 0);
1858
1859     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1860     OUT_BATCH(batch, 1);
1861
1862     /* Set system instruction pointer */
1863     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1864     OUT_BATCH(batch, 0);
1865 }
1866
1867 static void
1868 gen6_emit_state_base_address(VADriverContextP ctx)
1869 {
1870     struct i965_driver_data *i965 = i965_driver_data(ctx);
1871     struct intel_batchbuffer *batch = i965->batch;
1872     struct i965_render_state *render_state = &i965->render_state;
1873
1874     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1875     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1876     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1877     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1878     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1879     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1880     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1881     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1882     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1883     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1884 }
1885
1886 static void
1887 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1888 {
1889     struct i965_driver_data *i965 = i965_driver_data(ctx);
1890     struct intel_batchbuffer *batch = i965->batch;
1891     struct i965_render_state *render_state = &i965->render_state;
1892
1893     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1894               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1895               (4 - 2));
1896     OUT_BATCH(batch, 0);
1897     OUT_BATCH(batch, 0);
1898     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1899 }
1900
1901 static void
1902 gen6_emit_urb(VADriverContextP ctx)
1903 {
1904     struct i965_driver_data *i965 = i965_driver_data(ctx);
1905     struct intel_batchbuffer *batch = i965->batch;
1906
1907     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1908     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1909               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1910     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1911               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1912 }
1913
1914 static void
1915 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1916 {
1917     struct i965_driver_data *i965 = i965_driver_data(ctx);
1918     struct intel_batchbuffer *batch = i965->batch;
1919     struct i965_render_state *render_state = &i965->render_state;
1920
1921     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1922     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1923     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1924     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1925 }
1926
1927 static void
1928 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1929 {
1930     struct i965_driver_data *i965 = i965_driver_data(ctx);
1931     struct intel_batchbuffer *batch = i965->batch;
1932     struct i965_render_state *render_state = &i965->render_state;
1933
1934     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1935               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1936               (4 - 2));
1937     OUT_BATCH(batch, 0); /* VS */
1938     OUT_BATCH(batch, 0); /* GS */
1939     OUT_RELOC(batch, render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1940 }
1941
1942 static void
1943 gen6_emit_binding_table(VADriverContextP ctx)
1944 {
1945     struct i965_driver_data *i965 = i965_driver_data(ctx);
1946     struct intel_batchbuffer *batch = i965->batch;
1947
1948     /* Binding table pointers */
1949     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1950               GEN6_BINDING_TABLE_MODIFY_PS |
1951               (4 - 2));
1952     OUT_BATCH(batch, 0);        /* vs */
1953     OUT_BATCH(batch, 0);        /* gs */
1954     /* Only the PS uses the binding table */
1955     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1956 }
1957
1958 static void
1959 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1960 {
1961     struct i965_driver_data *i965 = i965_driver_data(ctx);
1962     struct intel_batchbuffer *batch = i965->batch;
1963
1964     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1965     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1966               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1967     OUT_BATCH(batch, 0);
1968     OUT_BATCH(batch, 0);
1969     OUT_BATCH(batch, 0);
1970     OUT_BATCH(batch, 0);
1971     OUT_BATCH(batch, 0);
1972
1973     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1974     OUT_BATCH(batch, 0);
1975 }
1976
1977 static void
1978 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1979 {
1980     i965_render_drawing_rectangle(ctx);
1981 }
1982
1983 static void
1984 gen6_emit_vs_state(VADriverContextP ctx)
1985 {
1986     struct i965_driver_data *i965 = i965_driver_data(ctx);
1987     struct intel_batchbuffer *batch = i965->batch;
1988
1989     /* disable VS constant buffer */
1990     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1991     OUT_BATCH(batch, 0);
1992     OUT_BATCH(batch, 0);
1993     OUT_BATCH(batch, 0);
1994     OUT_BATCH(batch, 0);
1995
1996     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1997     OUT_BATCH(batch, 0); /* without VS kernel */
1998     OUT_BATCH(batch, 0);
1999     OUT_BATCH(batch, 0);
2000     OUT_BATCH(batch, 0);
2001     OUT_BATCH(batch, 0); /* pass-through */
2002 }
2003
2004 static void
2005 gen6_emit_gs_state(VADriverContextP ctx)
2006 {
2007     struct i965_driver_data *i965 = i965_driver_data(ctx);
2008     struct intel_batchbuffer *batch = i965->batch;
2009
2010     /* disable GS constant buffer */
2011     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2012     OUT_BATCH(batch, 0);
2013     OUT_BATCH(batch, 0);
2014     OUT_BATCH(batch, 0);
2015     OUT_BATCH(batch, 0);
2016
2017     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2018     OUT_BATCH(batch, 0); /* without GS kernel */
2019     OUT_BATCH(batch, 0);
2020     OUT_BATCH(batch, 0);
2021     OUT_BATCH(batch, 0);
2022     OUT_BATCH(batch, 0);
2023     OUT_BATCH(batch, 0); /* pass-through */
2024 }
2025
2026 static void
2027 gen6_emit_clip_state(VADriverContextP ctx)
2028 {
2029     struct i965_driver_data *i965 = i965_driver_data(ctx);
2030     struct intel_batchbuffer *batch = i965->batch;
2031
2032     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2033     OUT_BATCH(batch, 0);
2034     OUT_BATCH(batch, 0); /* pass-through */
2035     OUT_BATCH(batch, 0);
2036 }
2037
2038 static void
2039 gen6_emit_sf_state(VADriverContextP ctx)
2040 {
2041     struct i965_driver_data *i965 = i965_driver_data(ctx);
2042     struct intel_batchbuffer *batch = i965->batch;
2043
2044     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2045     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2046               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2047               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2048     OUT_BATCH(batch, 0);
2049     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2050     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2051     OUT_BATCH(batch, 0);
2052     OUT_BATCH(batch, 0);
2053     OUT_BATCH(batch, 0);
2054     OUT_BATCH(batch, 0);
2055     OUT_BATCH(batch, 0); /* DW9 */
2056     OUT_BATCH(batch, 0);
2057     OUT_BATCH(batch, 0);
2058     OUT_BATCH(batch, 0);
2059     OUT_BATCH(batch, 0);
2060     OUT_BATCH(batch, 0); /* DW14 */
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, 0);
2063     OUT_BATCH(batch, 0);
2064     OUT_BATCH(batch, 0);
2065     OUT_BATCH(batch, 0); /* DW19 */
2066 }
2067
2068 static void
2069 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2070 {
2071     struct i965_driver_data *i965 = i965_driver_data(ctx);
2072     struct intel_batchbuffer *batch = i965->batch;
2073     struct i965_render_state *render_state = &i965->render_state;
2074
2075     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2076               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2077               (5 - 2));
2078     OUT_RELOC(batch,
2079               render_state->curbe.bo,
2080               I915_GEM_DOMAIN_INSTRUCTION, 0,
2081               (URB_CS_ENTRY_SIZE - 1));
2082     OUT_BATCH(batch, 0);
2083     OUT_BATCH(batch, 0);
2084     OUT_BATCH(batch, 0);
2085
2086     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2087     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2088               I915_GEM_DOMAIN_INSTRUCTION, 0,
2089               0);
2090     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2091               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2092     OUT_BATCH(batch, 0);
2093     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2094     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2095               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2096               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2097     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2098               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2099     OUT_BATCH(batch, 0);
2100     OUT_BATCH(batch, 0);
2101 }
2102
2103 static void
2104 gen6_emit_vertex_element_state(VADriverContextP ctx)
2105 {
2106     struct i965_driver_data *i965 = i965_driver_data(ctx);
2107     struct intel_batchbuffer *batch = i965->batch;
2108
2109     /* Set up our vertex elements, sourced from the single vertex buffer. */
2110     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2111     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2112     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2113               GEN6_VE0_VALID |
2114               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2115               (0 << VE0_OFFSET_SHIFT));
2116     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2117               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2118               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2119               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2120     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2121     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2122               GEN6_VE0_VALID |
2123               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2124               (8 << VE0_OFFSET_SHIFT));
2125     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2126               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2127               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2128               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2129 }
2130
2131 static void
2132 gen6_emit_vertices(VADriverContextP ctx)
2133 {
2134     struct i965_driver_data *i965 = i965_driver_data(ctx);
2135     struct intel_batchbuffer *batch = i965->batch;
2136     struct i965_render_state *render_state = &i965->render_state;
2137
2138     BEGIN_BATCH(batch, 11);
2139     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2140     OUT_BATCH(batch,
2141               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2142               GEN6_VB0_VERTEXDATA |
2143               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2144     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2145     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2146     OUT_BATCH(batch, 0);
2147
2148     OUT_BATCH(batch,
2149               CMD_3DPRIMITIVE |
2150               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2151               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2152               (0 << 9) |
2153               4);
2154     OUT_BATCH(batch, 3); /* vertex count per instance */
2155     OUT_BATCH(batch, 0); /* start vertex offset */
2156     OUT_BATCH(batch, 1); /* single instance */
2157     OUT_BATCH(batch, 0); /* start instance location */
2158     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2159     ADVANCE_BATCH(batch);
2160 }
2161
2162 static void
2163 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2164 {
2165     struct i965_driver_data *i965 = i965_driver_data(ctx);
2166     struct intel_batchbuffer *batch = i965->batch;
2167
2168     intel_batchbuffer_start_atomic(batch, 0x1000);
2169     intel_batchbuffer_emit_mi_flush(batch);
2170     gen6_emit_invarient_states(ctx);
2171     gen6_emit_state_base_address(ctx);
2172     gen6_emit_viewport_state_pointers(ctx);
2173     gen6_emit_urb(ctx);
2174     gen6_emit_cc_state_pointers(ctx);
2175     gen6_emit_sampler_state_pointers(ctx);
2176     gen6_emit_vs_state(ctx);
2177     gen6_emit_gs_state(ctx);
2178     gen6_emit_clip_state(ctx);
2179     gen6_emit_sf_state(ctx);
2180     gen6_emit_wm_state(ctx, kernel);
2181     gen6_emit_binding_table(ctx);
2182     gen6_emit_depth_buffer_state(ctx);
2183     gen6_emit_drawing_rectangle(ctx);
2184     gen6_emit_vertex_element_state(ctx);
2185     gen6_emit_vertices(ctx);
2186     intel_batchbuffer_end_atomic(batch);
2187 }
2188
2189 static void
2190 gen6_render_put_surface(
2191     VADriverContextP   ctx,
2192     struct object_surface *obj_surface,
2193     const VARectangle *src_rect,
2194     const VARectangle *dst_rect,
2195     unsigned int       flags
2196 )
2197 {
2198     struct i965_driver_data *i965 = i965_driver_data(ctx);
2199     struct intel_batchbuffer *batch = i965->batch;
2200
2201     gen6_render_initialize(ctx);
2202     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2203     i965_clear_dest_region(ctx);
2204     gen6_render_emit_states(ctx, PS_KERNEL);
2205     intel_batchbuffer_flush(batch);
2206 }
2207
2208 static void
2209 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2210 {
2211     struct i965_driver_data *i965 = i965_driver_data(ctx);
2212     struct i965_render_state *render_state = &i965->render_state;
2213     struct gen6_blend_state *blend_state;
2214
2215     dri_bo_unmap(render_state->cc.state);
2216     dri_bo_map(render_state->cc.blend, 1);
2217     assert(render_state->cc.blend->virtual);
2218     blend_state = render_state->cc.blend->virtual;
2219     memset(blend_state, 0, sizeof(*blend_state));
2220     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2221     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2222     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2223     blend_state->blend0.blend_enable = 1;
2224     blend_state->blend1.post_blend_clamp_enable = 1;
2225     blend_state->blend1.pre_blend_clamp_enable = 1;
2226     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2227     dri_bo_unmap(render_state->cc.blend);
2228 }
2229
2230 static void
2231 gen6_subpicture_render_setup_states(
2232     VADriverContextP   ctx,
2233     struct object_surface *obj_surface,
2234     const VARectangle *src_rect,
2235     const VARectangle *dst_rect
2236 )
2237 {
2238     i965_render_dest_surface_state(ctx, 0);
2239     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2240     i965_render_sampler(ctx);
2241     i965_render_cc_viewport(ctx);
2242     gen6_render_color_calc_state(ctx);
2243     gen6_subpicture_render_blend_state(ctx);
2244     gen6_render_depth_stencil_state(ctx);
2245     i965_subpic_render_upload_constants(ctx, obj_surface);
2246     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2247 }
2248
2249 static void
2250 gen6_render_put_subpicture(
2251     VADriverContextP   ctx,
2252     struct object_surface *obj_surface,
2253     const VARectangle *src_rect,
2254     const VARectangle *dst_rect
2255 )
2256 {
2257     struct i965_driver_data *i965 = i965_driver_data(ctx);
2258     struct intel_batchbuffer *batch = i965->batch;
2259     unsigned int index = obj_surface->subpic_render_idx;
2260     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2261
2262     assert(obj_subpic);
2263     gen6_render_initialize(ctx);
2264     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2265     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2266     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2267     intel_batchbuffer_flush(batch);
2268 }
2269
2270 /*
2271  * for GEN7
2272  */
2273 static void
2274 gen7_render_initialize(VADriverContextP ctx)
2275 {
2276     struct i965_driver_data *i965 = i965_driver_data(ctx);
2277     struct i965_render_state *render_state = &i965->render_state;
2278     dri_bo *bo;
2279
2280     /* VERTEX BUFFER */
2281     dri_bo_unreference(render_state->vb.vertex_buffer);
2282     bo = dri_bo_alloc(i965->intel.bufmgr,
2283                       "vertex buffer",
2284                       4096,
2285                       4096);
2286     assert(bo);
2287     render_state->vb.vertex_buffer = bo;
2288
2289     /* WM */
2290     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2291     bo = dri_bo_alloc(i965->intel.bufmgr,
2292                       "surface state & binding table",
2293                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2294                       4096);
2295     assert(bo);
2296     render_state->wm.surface_state_binding_table_bo = bo;
2297
2298     dri_bo_unreference(render_state->wm.sampler);
2299     bo = dri_bo_alloc(i965->intel.bufmgr,
2300                       "sampler state",
2301                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2302                       4096);
2303     assert(bo);
2304     render_state->wm.sampler = bo;
2305     render_state->wm.sampler_count = 0;
2306
2307     /* COLOR CALCULATOR */
2308     dri_bo_unreference(render_state->cc.state);
2309     bo = dri_bo_alloc(i965->intel.bufmgr,
2310                       "color calc state",
2311                       sizeof(struct gen6_color_calc_state),
2312                       4096);
2313     assert(bo);
2314     render_state->cc.state = bo;
2315
2316     /* CC VIEWPORT */
2317     dri_bo_unreference(render_state->cc.viewport);
2318     bo = dri_bo_alloc(i965->intel.bufmgr,
2319                       "cc viewport",
2320                       sizeof(struct i965_cc_viewport),
2321                       4096);
2322     assert(bo);
2323     render_state->cc.viewport = bo;
2324
2325     /* BLEND STATE */
2326     dri_bo_unreference(render_state->cc.blend);
2327     bo = dri_bo_alloc(i965->intel.bufmgr,
2328                       "blend state",
2329                       sizeof(struct gen6_blend_state),
2330                       4096);
2331     assert(bo);
2332     render_state->cc.blend = bo;
2333
2334     /* DEPTH & STENCIL STATE */
2335     dri_bo_unreference(render_state->cc.depth_stencil);
2336     bo = dri_bo_alloc(i965->intel.bufmgr,
2337                       "depth & stencil state",
2338                       sizeof(struct gen6_depth_stencil_state),
2339                       4096);
2340     assert(bo);
2341     render_state->cc.depth_stencil = bo;
2342 }
2343
2344 /*
2345  * for GEN8
2346  */
2347 #define ALIGNMENT       64
2348
2349 static void
2350 gen7_render_color_calc_state(VADriverContextP ctx)
2351 {
2352     struct i965_driver_data *i965 = i965_driver_data(ctx);
2353     struct i965_render_state *render_state = &i965->render_state;
2354     struct gen6_color_calc_state *color_calc_state;
2355
2356     dri_bo_map(render_state->cc.state, 1);
2357     assert(render_state->cc.state->virtual);
2358     color_calc_state = render_state->cc.state->virtual;
2359     memset(color_calc_state, 0, sizeof(*color_calc_state));
2360     color_calc_state->constant_r = 1.0;
2361     color_calc_state->constant_g = 0.0;
2362     color_calc_state->constant_b = 1.0;
2363     color_calc_state->constant_a = 1.0;
2364     dri_bo_unmap(render_state->cc.state);
2365 }
2366
2367 static void
2368 gen7_render_blend_state(VADriverContextP ctx)
2369 {
2370     struct i965_driver_data *i965 = i965_driver_data(ctx);
2371     struct i965_render_state *render_state = &i965->render_state;
2372     struct gen6_blend_state *blend_state;
2373
2374     dri_bo_map(render_state->cc.blend, 1);
2375     assert(render_state->cc.blend->virtual);
2376     blend_state = render_state->cc.blend->virtual;
2377     memset(blend_state, 0, sizeof(*blend_state));
2378     blend_state->blend1.logic_op_enable = 1;
2379     blend_state->blend1.logic_op_func = 0xc;
2380     blend_state->blend1.pre_blend_clamp_enable = 1;
2381     dri_bo_unmap(render_state->cc.blend);
2382 }
2383
2384 static void
2385 gen7_render_depth_stencil_state(VADriverContextP ctx)
2386 {
2387     struct i965_driver_data *i965 = i965_driver_data(ctx);
2388     struct i965_render_state *render_state = &i965->render_state;
2389     struct gen6_depth_stencil_state *depth_stencil_state;
2390
2391     dri_bo_map(render_state->cc.depth_stencil, 1);
2392     assert(render_state->cc.depth_stencil->virtual);
2393     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2394     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2395     dri_bo_unmap(render_state->cc.depth_stencil);
2396 }
2397
2398 static void
2399 gen7_render_sampler(VADriverContextP ctx)
2400 {
2401     struct i965_driver_data *i965 = i965_driver_data(ctx);
2402     struct i965_render_state *render_state = &i965->render_state;
2403     struct gen7_sampler_state *sampler_state;
2404     int i;
2405
2406     assert(render_state->wm.sampler_count > 0);
2407     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2408
2409     dri_bo_map(render_state->wm.sampler, 1);
2410     assert(render_state->wm.sampler->virtual);
2411     sampler_state = render_state->wm.sampler->virtual;
2412     for (i = 0; i < render_state->wm.sampler_count; i++) {
2413         memset(sampler_state, 0, sizeof(*sampler_state));
2414         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2415         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2416         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2417         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2418         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2419         sampler_state++;
2420     }
2421
2422     dri_bo_unmap(render_state->wm.sampler);
2423 }
2424
2425
2426 static void
2427 gen7_render_setup_states(
2428     VADriverContextP   ctx,
2429     struct object_surface *obj_surface,
2430     const VARectangle *src_rect,
2431     const VARectangle *dst_rect,
2432     unsigned int       flags
2433 )
2434 {
2435     i965_render_dest_surface_state(ctx, 0);
2436     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2437     gen7_render_sampler(ctx);
2438     i965_render_cc_viewport(ctx);
2439     gen7_render_color_calc_state(ctx);
2440     gen7_render_blend_state(ctx);
2441     gen7_render_depth_stencil_state(ctx);
2442     i965_render_upload_constants(ctx, obj_surface, flags);
2443     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2444 }
2445
2446
2447 static void
2448 gen7_emit_invarient_states(VADriverContextP ctx)
2449 {
2450     struct i965_driver_data *i965 = i965_driver_data(ctx);
2451     struct intel_batchbuffer *batch = i965->batch;
2452
2453     BEGIN_BATCH(batch, 1);
2454     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2455     ADVANCE_BATCH(batch);
2456
2457     BEGIN_BATCH(batch, 4);
2458     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2459     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2460               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2461     OUT_BATCH(batch, 0);
2462     OUT_BATCH(batch, 0);
2463     ADVANCE_BATCH(batch);
2464
2465     BEGIN_BATCH(batch, 2);
2466     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2467     OUT_BATCH(batch, 1);
2468     ADVANCE_BATCH(batch);
2469
2470     /* Set system instruction pointer */
2471     BEGIN_BATCH(batch, 2);
2472     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2473     OUT_BATCH(batch, 0);
2474     ADVANCE_BATCH(batch);
2475 }
2476
2477 static void
2478 gen7_emit_state_base_address(VADriverContextP ctx)
2479 {
2480     struct i965_driver_data *i965 = i965_driver_data(ctx);
2481     struct intel_batchbuffer *batch = i965->batch;
2482     struct i965_render_state *render_state = &i965->render_state;
2483
2484     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2485     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2486     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2487     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2488     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2489     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2490     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2491     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2492     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2493     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2494 }
2495
2496 static void
2497 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2498 {
2499     struct i965_driver_data *i965 = i965_driver_data(ctx);
2500     struct intel_batchbuffer *batch = i965->batch;
2501     struct i965_render_state *render_state = &i965->render_state;
2502
2503     BEGIN_BATCH(batch, 2);
2504     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2505     OUT_RELOC(batch,
2506               render_state->cc.viewport,
2507               I915_GEM_DOMAIN_INSTRUCTION, 0,
2508               0);
2509     ADVANCE_BATCH(batch);
2510
2511     BEGIN_BATCH(batch, 2);
2512     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2513     OUT_BATCH(batch, 0);
2514     ADVANCE_BATCH(batch);
2515 }
2516
2517 /*
2518  * URB layout on GEN7
2519  * ----------------------------------------
2520  * | PS Push Constants (8KB) | VS entries |
2521  * ----------------------------------------
2522  */
2523 static void
2524 gen7_emit_urb(VADriverContextP ctx)
2525 {
2526     struct i965_driver_data *i965 = i965_driver_data(ctx);
2527     struct intel_batchbuffer *batch = i965->batch;
2528     unsigned int num_urb_entries = 32;
2529
2530     if (IS_HASWELL(i965->intel.device_info))
2531         num_urb_entries = 64;
2532
2533     BEGIN_BATCH(batch, 2);
2534     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2535     OUT_BATCH(batch, 8); /* in 1KBs */
2536     ADVANCE_BATCH(batch);
2537
2538     BEGIN_BATCH(batch, 2);
2539     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2540     OUT_BATCH(batch,
2541               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2542               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2543               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2544     ADVANCE_BATCH(batch);
2545
2546     BEGIN_BATCH(batch, 2);
2547     OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2548     OUT_BATCH(batch,
2549               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2550               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2551     ADVANCE_BATCH(batch);
2552
2553     BEGIN_BATCH(batch, 2);
2554     OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2555     OUT_BATCH(batch,
2556               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2557               (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2558     ADVANCE_BATCH(batch);
2559
2560     BEGIN_BATCH(batch, 2);
2561     OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2562     OUT_BATCH(batch,
2563               (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2564               (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2565     ADVANCE_BATCH(batch);
2566 }
2567
2568 static void
2569 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2570 {
2571     struct i965_driver_data *i965 = i965_driver_data(ctx);
2572     struct intel_batchbuffer *batch = i965->batch;
2573     struct i965_render_state *render_state = &i965->render_state;
2574
2575     BEGIN_BATCH(batch, 2);
2576     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2577     OUT_RELOC(batch,
2578               render_state->cc.state,
2579               I915_GEM_DOMAIN_INSTRUCTION, 0,
2580               1);
2581     ADVANCE_BATCH(batch);
2582
2583     BEGIN_BATCH(batch, 2);
2584     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2585     OUT_RELOC(batch,
2586               render_state->cc.blend,
2587               I915_GEM_DOMAIN_INSTRUCTION, 0,
2588               1);
2589     ADVANCE_BATCH(batch);
2590
2591     BEGIN_BATCH(batch, 2);
2592     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2593     OUT_RELOC(batch,
2594               render_state->cc.depth_stencil,
2595               I915_GEM_DOMAIN_INSTRUCTION, 0,
2596               1);
2597     ADVANCE_BATCH(batch);
2598 }
2599
2600 static void
2601 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2602 {
2603     struct i965_driver_data *i965 = i965_driver_data(ctx);
2604     struct intel_batchbuffer *batch = i965->batch;
2605     struct i965_render_state *render_state = &i965->render_state;
2606
2607     BEGIN_BATCH(batch, 2);
2608     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2609     OUT_RELOC(batch,
2610               render_state->wm.sampler,
2611               I915_GEM_DOMAIN_INSTRUCTION, 0,
2612               0);
2613     ADVANCE_BATCH(batch);
2614 }
2615
2616 static void
2617 gen7_emit_binding_table(VADriverContextP ctx)
2618 {
2619     struct i965_driver_data *i965 = i965_driver_data(ctx);
2620     struct intel_batchbuffer *batch = i965->batch;
2621
2622     BEGIN_BATCH(batch, 2);
2623     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2624     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2625     ADVANCE_BATCH(batch);
2626 }
2627
2628 static void
2629 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2630 {
2631     struct i965_driver_data *i965 = i965_driver_data(ctx);
2632     struct intel_batchbuffer *batch = i965->batch;
2633
2634     BEGIN_BATCH(batch, 7);
2635     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2636     OUT_BATCH(batch,
2637               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2638               (I965_SURFACE_NULL << 29));
2639     OUT_BATCH(batch, 0);
2640     OUT_BATCH(batch, 0);
2641     OUT_BATCH(batch, 0);
2642     OUT_BATCH(batch, 0);
2643     OUT_BATCH(batch, 0);
2644     ADVANCE_BATCH(batch);
2645
2646     BEGIN_BATCH(batch, 3);
2647     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2648     OUT_BATCH(batch, 0);
2649     OUT_BATCH(batch, 0);
2650     ADVANCE_BATCH(batch);
2651 }
2652
2653 static void
2654 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2655 {
2656     i965_render_drawing_rectangle(ctx);
2657 }
2658
2659 static void
2660 gen7_emit_vs_state(VADriverContextP ctx)
2661 {
2662     struct i965_driver_data *i965 = i965_driver_data(ctx);
2663     struct intel_batchbuffer *batch = i965->batch;
2664
2665     /* disable VS constant buffer */
2666     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2667     OUT_BATCH(batch, 0);
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     OUT_BATCH(batch, 0);
2671     OUT_BATCH(batch, 0);
2672     OUT_BATCH(batch, 0);
2673
2674     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2675     OUT_BATCH(batch, 0); /* without VS kernel */
2676     OUT_BATCH(batch, 0);
2677     OUT_BATCH(batch, 0);
2678     OUT_BATCH(batch, 0);
2679     OUT_BATCH(batch, 0); /* pass-through */
2680 }
2681
2682 static void
2683 gen7_emit_bypass_state(VADriverContextP ctx)
2684 {
2685     struct i965_driver_data *i965 = i965_driver_data(ctx);
2686     struct intel_batchbuffer *batch = i965->batch;
2687
2688     /* bypass GS */
2689     BEGIN_BATCH(batch, 7);
2690     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2691     OUT_BATCH(batch, 0);
2692     OUT_BATCH(batch, 0);
2693     OUT_BATCH(batch, 0);
2694     OUT_BATCH(batch, 0);
2695     OUT_BATCH(batch, 0);
2696     OUT_BATCH(batch, 0);
2697     ADVANCE_BATCH(batch);
2698
2699     BEGIN_BATCH(batch, 7);
2700     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2701     OUT_BATCH(batch, 0); /* without GS kernel */
2702     OUT_BATCH(batch, 0);
2703     OUT_BATCH(batch, 0);
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0); /* pass-through */
2707     ADVANCE_BATCH(batch);
2708
2709     BEGIN_BATCH(batch, 2);
2710     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2711     OUT_BATCH(batch, 0);
2712     ADVANCE_BATCH(batch);
2713
2714     /* disable HS */
2715     BEGIN_BATCH(batch, 7);
2716     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2717     OUT_BATCH(batch, 0);
2718     OUT_BATCH(batch, 0);
2719     OUT_BATCH(batch, 0);
2720     OUT_BATCH(batch, 0);
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, 0);
2723     ADVANCE_BATCH(batch);
2724
2725     BEGIN_BATCH(batch, 7);
2726     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2727     OUT_BATCH(batch, 0);
2728     OUT_BATCH(batch, 0);
2729     OUT_BATCH(batch, 0);
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     OUT_BATCH(batch, 0);
2733     ADVANCE_BATCH(batch);
2734
2735     BEGIN_BATCH(batch, 2);
2736     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2737     OUT_BATCH(batch, 0);
2738     ADVANCE_BATCH(batch);
2739
2740     /* Disable TE */
2741     BEGIN_BATCH(batch, 4);
2742     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2743     OUT_BATCH(batch, 0);
2744     OUT_BATCH(batch, 0);
2745     OUT_BATCH(batch, 0);
2746     ADVANCE_BATCH(batch);
2747
2748     /* Disable DS */
2749     BEGIN_BATCH(batch, 7);
2750     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2751     OUT_BATCH(batch, 0);
2752     OUT_BATCH(batch, 0);
2753     OUT_BATCH(batch, 0);
2754     OUT_BATCH(batch, 0);
2755     OUT_BATCH(batch, 0);
2756     OUT_BATCH(batch, 0);
2757     ADVANCE_BATCH(batch);
2758
2759     BEGIN_BATCH(batch, 6);
2760     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2761     OUT_BATCH(batch, 0);
2762     OUT_BATCH(batch, 0);
2763     OUT_BATCH(batch, 0);
2764     OUT_BATCH(batch, 0);
2765     OUT_BATCH(batch, 0);
2766     ADVANCE_BATCH(batch);
2767
2768     BEGIN_BATCH(batch, 2);
2769     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2770     OUT_BATCH(batch, 0);
2771     ADVANCE_BATCH(batch);
2772
2773     /* Disable STREAMOUT */
2774     BEGIN_BATCH(batch, 3);
2775     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2776     OUT_BATCH(batch, 0);
2777     OUT_BATCH(batch, 0);
2778     ADVANCE_BATCH(batch);
2779 }
2780
2781 static void
2782 gen7_emit_clip_state(VADriverContextP ctx)
2783 {
2784     struct i965_driver_data *i965 = i965_driver_data(ctx);
2785     struct intel_batchbuffer *batch = i965->batch;
2786
2787     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2788     OUT_BATCH(batch, 0);
2789     OUT_BATCH(batch, 0); /* pass-through */
2790     OUT_BATCH(batch, 0);
2791 }
2792
2793 static void
2794 gen7_emit_sf_state(VADriverContextP ctx)
2795 {
2796     struct i965_driver_data *i965 = i965_driver_data(ctx);
2797     struct intel_batchbuffer *batch = i965->batch;
2798
2799     BEGIN_BATCH(batch, 14);
2800     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2801     OUT_BATCH(batch,
2802               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2803               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2804               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2805     OUT_BATCH(batch, 0);
2806     OUT_BATCH(batch, 0);
2807     OUT_BATCH(batch, 0); /* DW4 */
2808     OUT_BATCH(batch, 0);
2809     OUT_BATCH(batch, 0);
2810     OUT_BATCH(batch, 0);
2811     OUT_BATCH(batch, 0);
2812     OUT_BATCH(batch, 0); /* DW9 */
2813     OUT_BATCH(batch, 0);
2814     OUT_BATCH(batch, 0);
2815     OUT_BATCH(batch, 0);
2816     OUT_BATCH(batch, 0);
2817     ADVANCE_BATCH(batch);
2818
2819     BEGIN_BATCH(batch, 7);
2820     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2821     OUT_BATCH(batch, 0);
2822     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2823     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2824     OUT_BATCH(batch, 0);
2825     OUT_BATCH(batch, 0);
2826     OUT_BATCH(batch, 0);
2827     ADVANCE_BATCH(batch);
2828 }
2829
2830 static void
2831 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2832 {
2833     struct i965_driver_data *i965 = i965_driver_data(ctx);
2834     struct intel_batchbuffer *batch = i965->batch;
2835     struct i965_render_state *render_state = &i965->render_state;
2836     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2837     unsigned int num_samples = 0;
2838
2839     if (IS_HASWELL(i965->intel.device_info)) {
2840         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2841         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2842     }
2843
2844     BEGIN_BATCH(batch, 3);
2845     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2846     OUT_BATCH(batch,
2847               GEN7_WM_DISPATCH_ENABLE |
2848               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2849     OUT_BATCH(batch, 0);
2850     ADVANCE_BATCH(batch);
2851
2852     BEGIN_BATCH(batch, 7);
2853     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2854     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2855     OUT_BATCH(batch, 0);
2856     OUT_RELOC(batch,
2857               render_state->curbe.bo,
2858               I915_GEM_DOMAIN_INSTRUCTION, 0,
2859               0);
2860     OUT_BATCH(batch, 0);
2861     OUT_BATCH(batch, 0);
2862     OUT_BATCH(batch, 0);
2863     ADVANCE_BATCH(batch);
2864
2865     BEGIN_BATCH(batch, 8);
2866     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2867     OUT_RELOC(batch,
2868               render_state->render_kernels[kernel].bo,
2869               I915_GEM_DOMAIN_INSTRUCTION, 0,
2870               0);
2871     OUT_BATCH(batch,
2872               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2873               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2874     OUT_BATCH(batch, 0); /* scratch space base offset */
2875     OUT_BATCH(batch,
2876               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2877               GEN7_PS_PUSH_CONSTANT_ENABLE |
2878               GEN7_PS_ATTRIBUTE_ENABLE |
2879               GEN7_PS_16_DISPATCH_ENABLE);
2880     OUT_BATCH(batch,
2881               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2882     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2883     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2884     ADVANCE_BATCH(batch);
2885 }
2886
2887 static void
2888 gen7_emit_vertex_element_state(VADriverContextP ctx)
2889 {
2890     struct i965_driver_data *i965 = i965_driver_data(ctx);
2891     struct intel_batchbuffer *batch = i965->batch;
2892
2893     /* Set up our vertex elements, sourced from the single vertex buffer. */
2894     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2895     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2896     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2897               GEN6_VE0_VALID |
2898               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2899               (0 << VE0_OFFSET_SHIFT));
2900     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2901               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2902               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2903               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2904     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2905     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2906               GEN6_VE0_VALID |
2907               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2908               (8 << VE0_OFFSET_SHIFT));
2909     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2910               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2911               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2912               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2913 }
2914
2915 static void
2916 gen7_emit_vertices(VADriverContextP ctx)
2917 {
2918     struct i965_driver_data *i965 = i965_driver_data(ctx);
2919     struct intel_batchbuffer *batch = i965->batch;
2920     struct i965_render_state *render_state = &i965->render_state;
2921
2922     BEGIN_BATCH(batch, 5);
2923     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2924     OUT_BATCH(batch,
2925               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2926               GEN6_VB0_VERTEXDATA |
2927               GEN7_VB0_ADDRESS_MODIFYENABLE |
2928               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2929     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2930     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2931     OUT_BATCH(batch, 0);
2932     ADVANCE_BATCH(batch);
2933
2934     BEGIN_BATCH(batch, 7);
2935     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2936     OUT_BATCH(batch,
2937               _3DPRIM_RECTLIST |
2938               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2939     OUT_BATCH(batch, 3); /* vertex count per instance */
2940     OUT_BATCH(batch, 0); /* start vertex offset */
2941     OUT_BATCH(batch, 1); /* single instance */
2942     OUT_BATCH(batch, 0); /* start instance location */
2943     OUT_BATCH(batch, 0);
2944     ADVANCE_BATCH(batch);
2945 }
2946
2947 static void
2948 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2949 {
2950     struct i965_driver_data *i965 = i965_driver_data(ctx);
2951     struct intel_batchbuffer *batch = i965->batch;
2952
2953     intel_batchbuffer_start_atomic(batch, 0x1000);
2954     intel_batchbuffer_emit_mi_flush(batch);
2955     gen7_emit_invarient_states(ctx);
2956     gen7_emit_state_base_address(ctx);
2957     gen7_emit_viewport_state_pointers(ctx);
2958     gen7_emit_urb(ctx);
2959     gen7_emit_cc_state_pointers(ctx);
2960     gen7_emit_sampler_state_pointers(ctx);
2961     gen7_emit_bypass_state(ctx);
2962     gen7_emit_vs_state(ctx);
2963     gen7_emit_clip_state(ctx);
2964     gen7_emit_sf_state(ctx);
2965     gen7_emit_wm_state(ctx, kernel);
2966     gen7_emit_binding_table(ctx);
2967     gen7_emit_depth_buffer_state(ctx);
2968     gen7_emit_drawing_rectangle(ctx);
2969     gen7_emit_vertex_element_state(ctx);
2970     gen7_emit_vertices(ctx);
2971     intel_batchbuffer_end_atomic(batch);
2972 }
2973
2974
2975 static void
2976 gen7_render_put_surface(
2977     VADriverContextP   ctx,
2978     struct object_surface *obj_surface,
2979     const VARectangle *src_rect,
2980     const VARectangle *dst_rect,
2981     unsigned int       flags
2982 )
2983 {
2984     struct i965_driver_data *i965 = i965_driver_data(ctx);
2985     struct intel_batchbuffer *batch = i965->batch;
2986
2987     gen7_render_initialize(ctx);
2988     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2989     i965_clear_dest_region(ctx);
2990     gen7_render_emit_states(ctx, PS_KERNEL);
2991     intel_batchbuffer_flush(batch);
2992 }
2993
2994
2995 static void
2996 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2997 {
2998     struct i965_driver_data *i965 = i965_driver_data(ctx);
2999     struct i965_render_state *render_state = &i965->render_state;
3000     struct gen6_blend_state *blend_state;
3001
3002     dri_bo_unmap(render_state->cc.state);
3003     dri_bo_map(render_state->cc.blend, 1);
3004     assert(render_state->cc.blend->virtual);
3005     blend_state = render_state->cc.blend->virtual;
3006     memset(blend_state, 0, sizeof(*blend_state));
3007     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3008     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3009     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3010     blend_state->blend0.blend_enable = 1;
3011     blend_state->blend1.post_blend_clamp_enable = 1;
3012     blend_state->blend1.pre_blend_clamp_enable = 1;
3013     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3014     dri_bo_unmap(render_state->cc.blend);
3015 }
3016
3017 static void
3018 gen7_subpicture_render_setup_states(
3019     VADriverContextP   ctx,
3020     struct object_surface *obj_surface,
3021     const VARectangle *src_rect,
3022     const VARectangle *dst_rect
3023 )
3024 {
3025     i965_render_dest_surface_state(ctx, 0);
3026     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3027     i965_render_sampler(ctx);
3028     i965_render_cc_viewport(ctx);
3029     gen7_render_color_calc_state(ctx);
3030     gen7_subpicture_render_blend_state(ctx);
3031     gen7_render_depth_stencil_state(ctx);
3032     i965_subpic_render_upload_constants(ctx, obj_surface);
3033     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3034 }
3035
3036 static void
3037 gen7_render_put_subpicture(
3038     VADriverContextP   ctx,
3039     struct object_surface *obj_surface,
3040     const VARectangle *src_rect,
3041     const VARectangle *dst_rect
3042 )
3043 {
3044     struct i965_driver_data *i965 = i965_driver_data(ctx);
3045     struct intel_batchbuffer *batch = i965->batch;
3046     unsigned int index = obj_surface->subpic_render_idx;
3047     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3048
3049     assert(obj_subpic);
3050     gen7_render_initialize(ctx);
3051     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3052     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3053     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3054     intel_batchbuffer_flush(batch);
3055 }
3056
3057
3058 void
3059 intel_render_put_surface(
3060     VADriverContextP   ctx,
3061     struct object_surface *obj_surface,
3062     const VARectangle *src_rect,
3063     const VARectangle *dst_rect,
3064     unsigned int       flags
3065 )
3066 {
3067     struct i965_driver_data *i965 = i965_driver_data(ctx);
3068     struct i965_render_state *render_state = &i965->render_state;
3069     int has_done_scaling = 0;
3070     VARectangle calibrated_rect;
3071     VASurfaceID out_surface_id = i965_post_processing(ctx,
3072                                                       obj_surface,
3073                                                       src_rect,
3074                                                       dst_rect,
3075                                                       flags,
3076                                                       &has_done_scaling,
3077                                                       &calibrated_rect);
3078
3079     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3080
3081     if (out_surface_id != VA_INVALID_ID) {
3082         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3083
3084         if (new_obj_surface && new_obj_surface->bo)
3085             obj_surface = new_obj_surface;
3086
3087         if (has_done_scaling)
3088             src_rect = &calibrated_rect;
3089     }
3090
3091     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3092
3093     if (out_surface_id != VA_INVALID_ID)
3094         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3095 }
3096
3097 void
3098 intel_render_put_subpicture(
3099     VADriverContextP   ctx,
3100     struct object_surface *obj_surface,
3101     const VARectangle *src_rect,
3102     const VARectangle *dst_rect
3103 )
3104 {
3105     struct i965_driver_data *i965 = i965_driver_data(ctx);
3106     struct i965_render_state *render_state = &i965->render_state;
3107
3108     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3109 }
3110
3111 static void
3112 genx_render_terminate(VADriverContextP ctx)
3113 {
3114     int i;
3115     struct i965_driver_data *i965 = i965_driver_data(ctx);
3116     struct i965_render_state *render_state = &i965->render_state;
3117
3118     dri_bo_unreference(render_state->curbe.bo);
3119     render_state->curbe.bo = NULL;
3120
3121     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3122         struct i965_kernel *kernel = &render_state->render_kernels[i];
3123
3124         dri_bo_unreference(kernel->bo);
3125         kernel->bo = NULL;
3126     }
3127
3128     dri_bo_unreference(render_state->vb.vertex_buffer);
3129     render_state->vb.vertex_buffer = NULL;
3130     dri_bo_unreference(render_state->vs.state);
3131     render_state->vs.state = NULL;
3132     dri_bo_unreference(render_state->sf.state);
3133     render_state->sf.state = NULL;
3134     dri_bo_unreference(render_state->wm.sampler);
3135     render_state->wm.sampler = NULL;
3136     dri_bo_unreference(render_state->wm.state);
3137     render_state->wm.state = NULL;
3138     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3139     dri_bo_unreference(render_state->cc.viewport);
3140     render_state->cc.viewport = NULL;
3141     dri_bo_unreference(render_state->cc.state);
3142     render_state->cc.state = NULL;
3143     dri_bo_unreference(render_state->cc.blend);
3144     render_state->cc.blend = NULL;
3145     dri_bo_unreference(render_state->cc.depth_stencil);
3146     render_state->cc.depth_stencil = NULL;
3147
3148     if (render_state->draw_region) {
3149         dri_bo_unreference(render_state->draw_region->bo);
3150         free(render_state->draw_region);
3151         render_state->draw_region = NULL;
3152     }
3153 }
3154
3155 bool
3156 genx_render_init(VADriverContextP ctx)
3157 {
3158     struct i965_driver_data *i965 = i965_driver_data(ctx);
3159     struct i965_render_state *render_state = &i965->render_state;
3160     int i;
3161
3162     /* kernel */
3163     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
3164                                  sizeof(render_kernels_gen5[0])));
3165     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
3166                                  sizeof(render_kernels_gen6[0])));
3167
3168     if (IS_GEN7(i965->intel.device_info)) {
3169         memcpy(render_state->render_kernels,
3170                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3171                sizeof(render_state->render_kernels));
3172         render_state->render_put_surface = gen7_render_put_surface;
3173         render_state->render_put_subpicture = gen7_render_put_subpicture;
3174     } else if (IS_GEN6(i965->intel.device_info)) {
3175         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3176         render_state->render_put_surface = gen6_render_put_surface;
3177         render_state->render_put_subpicture = gen6_render_put_subpicture;
3178     } else if (IS_IRONLAKE(i965->intel.device_info)) {
3179         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3180         render_state->render_put_surface = i965_render_put_surface;
3181         render_state->render_put_subpicture = i965_render_put_subpicture;
3182     } else {
3183         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3184         render_state->render_put_surface = i965_render_put_surface;
3185         render_state->render_put_subpicture = i965_render_put_subpicture;
3186     }
3187
3188     render_state->render_terminate = genx_render_terminate;
3189
3190     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3191         struct i965_kernel *kernel = &render_state->render_kernels[i];
3192
3193         if (!kernel->size)
3194             continue;
3195
3196         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
3197                                   kernel->name,
3198                                   kernel->size, 0x1000);
3199         assert(kernel->bo);
3200         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3201     }
3202
3203     /* constant buffer */
3204     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3205                                           "constant buffer",
3206                                           4096, 64);
3207     assert(render_state->curbe.bo);
3208
3209     return true;
3210 }
3211
3212 bool
3213 i965_render_init(VADriverContextP ctx)
3214 {
3215     struct i965_driver_data *i965 = i965_driver_data(ctx);
3216
3217     return i965->codec_info->render_init(ctx);
3218 }
3219
3220 void
3221 i965_render_terminate(VADriverContextP ctx)
3222 {
3223     struct i965_driver_data *i965 = i965_driver_data(ctx);
3224     struct i965_render_state *render_state = &i965->render_state;
3225
3226     render_state->render_terminate(ctx);
3227 }