OSDN Git Service

a45921e72b6eba06cd55c1684738301e418a6b62
[android-x86/external-mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23
24 /**
25  * This file contains common screen and context structures and functions
26  * for r600g and radeonsi.
27  */
28
29 #ifndef R600_PIPE_COMMON_H
30 #define R600_PIPE_COMMON_H
31
32 #include <stdio.h>
33
34 #include "amd/common/ac_binary.h"
35
36 #include "radeon/radeon_winsys.h"
37
38 #include "util/disk_cache.h"
39 #include "util/u_blitter.h"
40 #include "util/list.h"
41 #include "util/u_range.h"
42 #include "util/slab.h"
43 #include "util/u_suballoc.h"
44 #include "util/u_transfer.h"
45 #include "util/u_threaded_context.h"
46
47 struct u_log_context;
48
49 #define ATI_VENDOR_ID 0x1002
50
51 #define R600_RESOURCE_FLAG_TRANSFER             (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
52 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH        (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
53 #define R600_RESOURCE_FLAG_FORCE_TILING         (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
54 #define R600_RESOURCE_FLAG_DISABLE_DCC          (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
55 #define R600_RESOURCE_FLAG_UNMAPPABLE           (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
56
57 #define R600_CONTEXT_STREAMOUT_FLUSH            (1u << 0)
58 /* Pipeline & streamout query controls. */
59 #define R600_CONTEXT_START_PIPELINE_STATS       (1u << 1)
60 #define R600_CONTEXT_STOP_PIPELINE_STATS        (1u << 2)
61 #define R600_CONTEXT_FLUSH_FOR_RENDER_COND      (1u << 3)
62 #define R600_CONTEXT_PRIVATE_FLAG               (1u << 4)
63
64 /* special primitive types */
65 #define R600_PRIM_RECTANGLE_LIST        PIPE_PRIM_MAX
66
67 #define R600_NOT_QUERY          0xffffffff
68
69 /* Debug flags. */
70 enum {
71         /* Shader logging options: */
72         DBG_VS = PIPE_SHADER_VERTEX,
73         DBG_PS = PIPE_SHADER_FRAGMENT,
74         DBG_GS = PIPE_SHADER_GEOMETRY,
75         DBG_TCS = PIPE_SHADER_TESS_CTRL,
76         DBG_TES = PIPE_SHADER_TESS_EVAL,
77         DBG_CS = PIPE_SHADER_COMPUTE,
78         DBG_NO_IR,
79         DBG_NO_TGSI,
80         DBG_NO_ASM,
81         DBG_PREOPT_IR,
82
83         /* Shader compiler options the shader cache should be aware of: */
84         DBG_FS_CORRECT_DERIVS_AFTER_KILL,
85         DBG_UNSAFE_MATH,
86         DBG_SI_SCHED,
87
88         /* Shader compiler options (with no effect on the shader cache): */
89         DBG_CHECK_IR,
90         DBG_PRECOMPILE,
91         DBG_NIR,
92         DBG_MONOLITHIC_SHADERS,
93         DBG_NO_OPT_VARIANT,
94
95         /* Information logging options: */
96         DBG_INFO,
97         DBG_TEX,
98         DBG_COMPUTE,
99         DBG_VM,
100
101         /* Driver options: */
102         DBG_FORCE_DMA,
103         DBG_NO_ASYNC_DMA,
104         DBG_NO_DISCARD_RANGE,
105         DBG_NO_WC,
106         DBG_CHECK_VM,
107         DBG_RESERVE_VMID,
108
109         /* 3D engine options: */
110         DBG_SWITCH_ON_EOP,
111         DBG_NO_OUT_OF_ORDER,
112         DBG_NO_DPBB,
113         DBG_NO_DFSM,
114         DBG_DPBB,
115         DBG_DFSM,
116         DBG_NO_HYPERZ,
117         DBG_NO_RB_PLUS,
118         DBG_NO_2D_TILING,
119         DBG_NO_TILING,
120         DBG_NO_DCC,
121         DBG_NO_DCC_CLEAR,
122         DBG_NO_DCC_FB,
123
124         /* Tests: */
125         DBG_TEST_DMA,
126         DBG_TEST_VMFAULT_CP,
127         DBG_TEST_VMFAULT_SDMA,
128         DBG_TEST_VMFAULT_SHADER,
129 };
130
131 #define DBG_ALL_SHADERS         (((1 << (DBG_CS + 1)) - 1))
132 #define DBG(name)               (1ull << DBG_##name)
133
134 #define R600_MAP_BUFFER_ALIGNMENT 64
135
136 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
137
138 enum r600_coherency {
139         R600_COHERENCY_NONE, /* no cache flushes needed */
140         R600_COHERENCY_SHADER,
141         R600_COHERENCY_CB_META,
142 };
143
144 #ifdef PIPE_ARCH_BIG_ENDIAN
145 #define R600_BIG_ENDIAN 1
146 #else
147 #define R600_BIG_ENDIAN 0
148 #endif
149
150 struct r600_common_context;
151 struct r600_perfcounters;
152 struct tgsi_shader_info;
153 struct r600_qbo_state;
154
155 void si_radeon_shader_binary_init(struct ac_shader_binary *b);
156 void si_radeon_shader_binary_clean(struct ac_shader_binary *b);
157
158 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
159  * at the moment.
160  */
161 struct r600_resource {
162         struct threaded_resource        b;
163
164         /* Winsys objects. */
165         struct pb_buffer                *buf;
166         uint64_t                        gpu_address;
167         /* Memory usage if the buffer placement is optimal. */
168         uint64_t                        vram_usage;
169         uint64_t                        gart_usage;
170
171         /* Resource properties. */
172         uint64_t                        bo_size;
173         unsigned                        bo_alignment;
174         enum radeon_bo_domain           domains;
175         enum radeon_bo_flag             flags;
176         unsigned                        bind_history;
177
178         /* The buffer range which is initialized (with a write transfer,
179          * streamout, DMA, or as a random access target). The rest of
180          * the buffer is considered invalid and can be mapped unsynchronized.
181          *
182          * This allows unsychronized mapping of a buffer range which hasn't
183          * been used yet. It's for applications which forget to use
184          * the unsynchronized map flag and expect the driver to figure it out.
185          */
186         struct util_range               valid_buffer_range;
187
188         /* For buffers only. This indicates that a write operation has been
189          * performed by TC L2, but the cache hasn't been flushed.
190          * Any hw block which doesn't use or bypasses TC L2 should check this
191          * flag and flush the cache before using the buffer.
192          *
193          * For example, TC L2 must be flushed if a buffer which has been
194          * modified by a shader store instruction is about to be used as
195          * an index buffer. The reason is that VGT DMA index fetching doesn't
196          * use TC L2.
197          */
198         bool                            TC_L2_dirty;
199
200         /* Whether the resource has been exported via resource_get_handle. */
201         unsigned                        external_usage; /* PIPE_HANDLE_USAGE_* */
202
203         /* Whether this resource is referenced by bindless handles. */
204         bool                            texture_handle_allocated;
205         bool                            image_handle_allocated;
206 };
207
208 struct r600_transfer {
209         struct threaded_transfer        b;
210         struct r600_resource            *staging;
211         unsigned                        offset;
212 };
213
214 struct r600_fmask_info {
215         uint64_t offset;
216         uint64_t size;
217         unsigned alignment;
218         unsigned pitch_in_pixels;
219         unsigned bank_height;
220         unsigned slice_tile_max;
221         unsigned tile_mode_index;
222         unsigned tile_swizzle;
223 };
224
225 struct r600_cmask_info {
226         uint64_t offset;
227         uint64_t size;
228         unsigned alignment;
229         unsigned slice_tile_max;
230         uint64_t base_address_reg;
231 };
232
233 struct r600_texture {
234         struct r600_resource            resource;
235
236         uint64_t                        size;
237         unsigned                        num_level0_transfers;
238         enum pipe_format                db_render_format;
239         bool                            is_depth;
240         bool                            db_compatible;
241         bool                            can_sample_z;
242         bool                            can_sample_s;
243         unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
244         unsigned                        stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
245         struct r600_texture             *flushed_depth_texture;
246         struct radeon_surf              surface;
247
248         /* Colorbuffer compression and fast clear. */
249         struct r600_fmask_info          fmask;
250         struct r600_cmask_info          cmask;
251         struct r600_resource            *cmask_buffer;
252         uint64_t                        dcc_offset; /* 0 = disabled */
253         unsigned                        cb_color_info; /* fast clear enable bit */
254         unsigned                        color_clear_value[2];
255         unsigned                        last_msaa_resolve_target_micro_mode;
256
257         /* Depth buffer compression and fast clear. */
258         uint64_t                        htile_offset;
259         bool                            tc_compatible_htile;
260         bool                            depth_cleared; /* if it was cleared at least once */
261         float                           depth_clear_value;
262         bool                            stencil_cleared; /* if it was cleared at least once */
263         uint8_t                         stencil_clear_value;
264         bool                            upgraded_depth; /* upgraded from unorm to Z32_FLOAT */
265
266         bool                            non_disp_tiling; /* R600-Cayman only */
267
268         /* Whether the texture is a displayable back buffer and needs DCC
269          * decompression, which is expensive. Therefore, it's enabled only
270          * if statistics suggest that it will pay off and it's allocated
271          * separately. It can't be bound as a sampler by apps. Limited to
272          * target == 2D and last_level == 0. If enabled, dcc_offset contains
273          * the absolute GPUVM address, not the relative one.
274          */
275         struct r600_resource            *dcc_separate_buffer;
276         /* When DCC is temporarily disabled, the separate buffer is here. */
277         struct r600_resource            *last_dcc_separate_buffer;
278         /* We need to track DCC dirtiness, because st/dri usually calls
279          * flush_resource twice per frame (not a bug) and we don't wanna
280          * decompress DCC twice. Also, the dirty tracking must be done even
281          * if DCC isn't used, because it's required by the DCC usage analysis
282          * for a possible future enablement.
283          */
284         bool                            separate_dcc_dirty;
285         /* Statistics gathering for the DCC enablement heuristic. */
286         bool                            dcc_gather_statistics;
287         /* Estimate of how much this color buffer is written to in units of
288          * full-screen draws: ps_invocations / (width * height)
289          * Shader kills, late Z, and blending with trivial discards make it
290          * inaccurate (we need to count CB updates, not PS invocations).
291          */
292         unsigned                        ps_draw_ratio;
293         /* The number of clears since the last DCC usage analysis. */
294         unsigned                        num_slow_clears;
295
296         /* Counter that should be non-zero if the texture is bound to a
297          * framebuffer. Implemented in radeonsi only.
298          */
299         uint32_t                        framebuffers_bound;
300 };
301
302 struct r600_surface {
303         struct pipe_surface             base;
304
305         /* These can vary with block-compressed textures. */
306         unsigned width0;
307         unsigned height0;
308
309         bool color_initialized;
310         bool depth_initialized;
311
312         /* Misc. color flags. */
313         bool alphatest_bypass;
314         bool export_16bpc;
315         bool color_is_int8;
316         bool color_is_int10;
317         bool dcc_incompatible;
318
319         /* Color registers. */
320         unsigned cb_color_info;
321         unsigned cb_color_base;
322         unsigned cb_color_view;
323         unsigned cb_color_size;         /* R600 only */
324         unsigned cb_color_dim;          /* EG only */
325         unsigned cb_color_pitch;        /* EG and later */
326         unsigned cb_color_slice;        /* EG and later */
327         unsigned cb_color_attrib;       /* EG and later */
328         unsigned cb_color_attrib2;      /* GFX9 and later */
329         unsigned cb_dcc_control;        /* VI and later */
330         unsigned cb_color_fmask;        /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
331         unsigned cb_color_fmask_slice;  /* EG and later */
332         unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
333         unsigned cb_color_mask;         /* R600 only */
334         unsigned spi_shader_col_format;         /* SI+, no blending, no alpha-to-coverage. */
335         unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
336         unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. */
337         unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
338         struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
339         struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
340
341         /* DB registers. */
342         uint64_t db_depth_base;         /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
343         uint64_t db_stencil_base;       /* EG and later */
344         uint64_t db_htile_data_base;
345         unsigned db_depth_info;         /* R600 only, then SI and later */
346         unsigned db_z_info;             /* EG and later */
347         unsigned db_z_info2;            /* GFX9+ */
348         unsigned db_depth_view;
349         unsigned db_depth_size;
350         unsigned db_depth_slice;        /* EG and later */
351         unsigned db_stencil_info;       /* EG and later */
352         unsigned db_stencil_info2;      /* GFX9+ */
353         unsigned db_prefetch_limit;     /* R600 only */
354         unsigned db_htile_surface;
355         unsigned db_preload_control;    /* EG and later */
356 };
357
358 struct r600_mmio_counter {
359         unsigned busy;
360         unsigned idle;
361 };
362
363 union r600_mmio_counters {
364         struct {
365                 /* For global GPU load including SDMA. */
366                 struct r600_mmio_counter gpu;
367
368                 /* GRBM_STATUS */
369                 struct r600_mmio_counter spi;
370                 struct r600_mmio_counter gui;
371                 struct r600_mmio_counter ta;
372                 struct r600_mmio_counter gds;
373                 struct r600_mmio_counter vgt;
374                 struct r600_mmio_counter ia;
375                 struct r600_mmio_counter sx;
376                 struct r600_mmio_counter wd;
377                 struct r600_mmio_counter bci;
378                 struct r600_mmio_counter sc;
379                 struct r600_mmio_counter pa;
380                 struct r600_mmio_counter db;
381                 struct r600_mmio_counter cp;
382                 struct r600_mmio_counter cb;
383
384                 /* SRBM_STATUS2 */
385                 struct r600_mmio_counter sdma;
386
387                 /* CP_STAT */
388                 struct r600_mmio_counter pfp;
389                 struct r600_mmio_counter meq;
390                 struct r600_mmio_counter me;
391                 struct r600_mmio_counter surf_sync;
392                 struct r600_mmio_counter cp_dma;
393                 struct r600_mmio_counter scratch_ram;
394         } named;
395         unsigned array[0];
396 };
397
398 struct r600_memory_object {
399         struct pipe_memory_object       b;
400         struct pb_buffer                *buf;
401         uint32_t                        stride;
402         uint32_t                        offset;
403 };
404
405 struct r600_common_screen {
406         struct pipe_screen              b;
407         struct radeon_winsys            *ws;
408         enum radeon_family              family;
409         enum chip_class                 chip_class;
410         struct radeon_info              info;
411         uint64_t                        debug_flags;
412         bool                            has_cp_dma;
413         bool                            has_streamout;
414         bool                            has_rbplus;     /* if RB+ registers exist */
415         bool                            rbplus_allowed; /* if RB+ is allowed */
416
417         struct disk_cache               *disk_shader_cache;
418
419         struct slab_parent_pool         pool_transfers;
420
421         /* Texture filter settings. */
422         int                             force_aniso; /* -1 = disabled */
423
424         /* Auxiliary context. Mainly used to initialize resources.
425          * It must be locked prior to using and flushed before unlocking. */
426         struct pipe_context             *aux_context;
427         mtx_t                           aux_context_lock;
428
429         /* This must be in the screen, because UE4 uses one context for
430          * compilation and another one for rendering.
431          */
432         unsigned                        num_compilations;
433         /* Along with ST_DEBUG=precompile, this should show if applications
434          * are loading shaders on demand. This is a monotonic counter.
435          */
436         unsigned                        num_shaders_created;
437         unsigned                        num_shader_cache_hits;
438
439         /* GPU load thread. */
440         mtx_t                           gpu_load_mutex;
441         thrd_t                          gpu_load_thread;
442         union r600_mmio_counters        mmio_counters;
443         volatile unsigned               gpu_load_stop_thread; /* bool */
444
445         char                            renderer_string[100];
446
447         /* Performance counters. */
448         struct r600_perfcounters        *perfcounters;
449
450         /* If pipe_screen wants to recompute and re-emit the framebuffer,
451          * sampler, and image states of all contexts, it should atomically
452          * increment this.
453          *
454          * Each context will compare this with its own last known value of
455          * the counter before drawing and re-emit the states accordingly.
456          */
457         unsigned                        dirty_tex_counter;
458
459         /* Atomically increment this counter when an existing texture's
460          * metadata is enabled or disabled in a way that requires changing
461          * contexts' compressed texture binding masks.
462          */
463         unsigned                        compressed_colortex_counter;
464
465         struct {
466                 /* Context flags to set so that all writes from earlier jobs
467                  * in the CP are seen by L2 clients.
468                  */
469                 unsigned cp_to_L2;
470
471                 /* Context flags to set so that all writes from earlier jobs
472                  * that end in L2 are seen by CP.
473                  */
474                 unsigned L2_to_cp;
475
476                 /* Context flags to set so that all writes from earlier
477                  * compute jobs are seen by L2 clients.
478                  */
479                 unsigned compute_to_L2;
480         } barrier_flags;
481
482         void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
483                                       struct r600_texture *rtex,
484                                       struct radeon_bo_metadata *md);
485
486         void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
487                                     struct r600_texture *rtex,
488                                     struct radeon_bo_metadata *md);
489 };
490
491 /* This encapsulates a state or an operation which can emitted into the GPU
492  * command stream. */
493 struct r600_atom {
494         void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
495         unsigned short          id;
496 };
497
498 struct r600_ring {
499         struct radeon_winsys_cs         *cs;
500         void (*flush)(void *ctx, unsigned flags,
501                       struct pipe_fence_handle **fence);
502 };
503
504 /* Saved CS data for debugging features. */
505 struct radeon_saved_cs {
506         uint32_t                        *ib;
507         unsigned                        num_dw;
508
509         struct radeon_bo_list_item      *bo_list;
510         unsigned                        bo_count;
511 };
512
513 struct r600_common_context {
514         struct pipe_context b; /* base class */
515
516         struct r600_common_screen       *screen;
517         struct radeon_winsys            *ws;
518         struct radeon_winsys_ctx        *ctx;
519         enum radeon_family              family;
520         enum chip_class                 chip_class;
521         struct r600_ring                gfx;
522         struct r600_ring                dma;
523         struct pipe_fence_handle        *last_gfx_fence;
524         struct pipe_fence_handle        *last_sdma_fence;
525         struct r600_resource            *eop_bug_scratch;
526         unsigned                        num_gfx_cs_flushes;
527         unsigned                        initial_gfx_cs_size;
528         unsigned                        gpu_reset_counter;
529         unsigned                        last_dirty_tex_counter;
530         unsigned                        last_compressed_colortex_counter;
531         unsigned                        last_num_draw_calls;
532
533         struct threaded_context         *tc;
534         struct u_suballocator           *allocator_zeroed_memory;
535         struct slab_child_pool          pool_transfers;
536         struct slab_child_pool          pool_transfers_unsync; /* for threaded_context */
537
538         /* Current unaccounted memory usage. */
539         uint64_t                        vram;
540         uint64_t                        gtt;
541
542         /* Additional context states. */
543         unsigned flags; /* flush flags */
544
545         /* Queries. */
546         /* Maintain the list of active queries for pausing between IBs. */
547         int                             num_occlusion_queries;
548         int                             num_perfect_occlusion_queries;
549         struct list_head                active_queries;
550         unsigned                        num_cs_dw_queries_suspend;
551         /* Misc stats. */
552         unsigned                        num_draw_calls;
553         unsigned                        num_decompress_calls;
554         unsigned                        num_mrt_draw_calls;
555         unsigned                        num_prim_restart_calls;
556         unsigned                        num_spill_draw_calls;
557         unsigned                        num_compute_calls;
558         unsigned                        num_spill_compute_calls;
559         unsigned                        num_dma_calls;
560         unsigned                        num_cp_dma_calls;
561         unsigned                        num_vs_flushes;
562         unsigned                        num_ps_flushes;
563         unsigned                        num_cs_flushes;
564         unsigned                        num_cb_cache_flushes;
565         unsigned                        num_db_cache_flushes;
566         unsigned                        num_L2_invalidates;
567         unsigned                        num_L2_writebacks;
568         unsigned                        num_resident_handles;
569         uint64_t                        num_alloc_tex_transfer_bytes;
570         unsigned                        last_tex_ps_draw_ratio; /* for query */
571
572         /* Render condition. */
573         struct r600_atom                render_cond_atom;
574         struct pipe_query               *render_cond;
575         unsigned                        render_cond_mode;
576         bool                            render_cond_invert;
577         bool                            render_cond_force_off; /* for u_blitter */
578
579         /* Statistics gathering for the DCC enablement heuristic. It can't be
580          * in r600_texture because r600_texture can be shared by multiple
581          * contexts. This is for back buffers only. We shouldn't get too many
582          * of those.
583          *
584          * X11 DRI3 rotates among a finite set of back buffers. They should
585          * all fit in this array. If they don't, separate DCC might never be
586          * enabled by DCC stat gathering.
587          */
588         struct {
589                 struct r600_texture             *tex;
590                 /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
591                 struct pipe_query               *ps_stats[3];
592                 /* If all slots are used and another slot is needed,
593                  * the least recently used slot is evicted based on this. */
594                 int64_t                         last_use_timestamp;
595                 bool                            query_active;
596         } dcc_stats[5];
597
598         struct pipe_debug_callback      debug;
599         struct pipe_device_reset_callback device_reset_callback;
600         struct u_log_context            *log;
601
602         void                            *query_result_shader;
603
604         /* Copy one resource to another using async DMA. */
605         void (*dma_copy)(struct pipe_context *ctx,
606                          struct pipe_resource *dst,
607                          unsigned dst_level,
608                          unsigned dst_x, unsigned dst_y, unsigned dst_z,
609                          struct pipe_resource *src,
610                          unsigned src_level,
611                          const struct pipe_box *src_box);
612
613         void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
614                                  uint64_t offset, uint64_t size, unsigned value);
615
616         void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
617                              uint64_t offset, uint64_t size, unsigned value,
618                              enum r600_coherency coher);
619
620         void (*blit_decompress_depth)(struct pipe_context *ctx,
621                                       struct r600_texture *texture,
622                                       struct r600_texture *staging,
623                                       unsigned first_level, unsigned last_level,
624                                       unsigned first_layer, unsigned last_layer,
625                                       unsigned first_sample, unsigned last_sample);
626
627         void (*decompress_dcc)(struct pipe_context *ctx,
628                                struct r600_texture *rtex);
629
630         /* Reallocate the buffer and update all resource bindings where
631          * the buffer is bound, including all resource descriptors. */
632         void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
633
634         /* Update all resource bindings where the buffer is bound, including
635          * all resource descriptors. This is invalidate_buffer without
636          * the invalidation. */
637         void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
638                               uint64_t old_gpu_address);
639
640         /* Enable or disable occlusion queries. */
641         void (*set_occlusion_query_state)(struct pipe_context *ctx,
642                                           bool old_enable,
643                                           bool old_perfect_enable);
644
645         void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
646
647         /* This ensures there is enough space in the command stream. */
648         void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
649                                   bool include_draw_vbo);
650
651         void (*set_atom_dirty)(struct r600_common_context *ctx,
652                                struct r600_atom *atom, bool dirty);
653
654         void (*check_vm_faults)(struct r600_common_context *ctx,
655                                 struct radeon_saved_cs *saved,
656                                 enum ring_type ring);
657 };
658
659 /* r600_buffer_common.c */
660 bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
661                                    struct pb_buffer *buf,
662                                    enum radeon_bo_usage usage);
663 void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
664                                     struct r600_resource *resource,
665                                     unsigned usage);
666 void si_buffer_subdata(struct pipe_context *ctx,
667                        struct pipe_resource *buffer,
668                        unsigned usage, unsigned offset,
669                        unsigned size, const void *data);
670 void si_init_resource_fields(struct r600_common_screen *rscreen,
671                              struct r600_resource *res,
672                              uint64_t size, unsigned alignment);
673 bool si_alloc_resource(struct r600_common_screen *rscreen,
674                        struct r600_resource *res);
675 struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
676                                        const struct pipe_resource *templ,
677                                        unsigned alignment);
678 struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
679                                                unsigned flags,
680                                                unsigned usage,
681                                                unsigned size,
682                                                unsigned alignment);
683 struct pipe_resource *
684 si_buffer_from_user_memory(struct pipe_screen *screen,
685                            const struct pipe_resource *templ,
686                            void *user_memory);
687 void si_invalidate_resource(struct pipe_context *ctx,
688                             struct pipe_resource *resource);
689 void si_replace_buffer_storage(struct pipe_context *ctx,
690                                struct pipe_resource *dst,
691                                struct pipe_resource *src);
692
693 /* r600_common_pipe.c */
694 void si_gfx_write_event_eop(struct r600_common_context *ctx,
695                             unsigned event, unsigned event_flags,
696                             unsigned data_sel,
697                             struct r600_resource *buf, uint64_t va,
698                             uint32_t new_fence, unsigned query_type);
699 unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen);
700 void si_gfx_wait_fence(struct r600_common_context *ctx,
701                        uint64_t va, uint32_t ref, uint32_t mask);
702 bool si_common_screen_init(struct r600_common_screen *rscreen,
703                            struct radeon_winsys *ws);
704 void si_destroy_common_screen(struct r600_common_screen *rscreen);
705 void si_preflush_suspend_features(struct r600_common_context *ctx);
706 void si_postflush_resume_features(struct r600_common_context *ctx);
707 bool si_common_context_init(struct r600_common_context *rctx,
708                             struct r600_common_screen *rscreen,
709                             unsigned context_flags);
710 void si_common_context_cleanup(struct r600_common_context *rctx);
711 bool si_can_dump_shader(struct r600_common_screen *rscreen,
712                         unsigned processor);
713 bool si_extra_shader_checks(struct r600_common_screen *rscreen,
714                             unsigned processor);
715 void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
716                             uint64_t offset, uint64_t size, unsigned value);
717 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
718                                                 const struct pipe_resource *templ);
719 const char *si_get_llvm_processor_name(enum radeon_family family);
720 void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
721                        struct r600_resource *dst, struct r600_resource *src);
722 void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
723                 struct radeon_saved_cs *saved, bool get_buffer_list);
724 void si_clear_saved_cs(struct radeon_saved_cs *saved);
725 bool si_check_device_reset(struct r600_common_context *rctx);
726
727 /* r600_gpu_load.c */
728 void si_gpu_load_kill_thread(struct r600_common_screen *rscreen);
729 uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type);
730 unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
731                         uint64_t begin);
732
733 /* r600_perfcounters.c */
734 void si_perfcounters_destroy(struct r600_common_screen *rscreen);
735
736 /* r600_query.c */
737 void si_init_screen_query_functions(struct r600_common_screen *rscreen);
738 void si_init_query_functions(struct r600_common_context *rctx);
739 void si_suspend_queries(struct r600_common_context *ctx);
740 void si_resume_queries(struct r600_common_context *ctx);
741
742 /* r600_test_dma.c */
743 void si_test_dma(struct r600_common_screen *rscreen);
744
745 /* r600_texture.c */
746 bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
747                              struct r600_texture *rdst,
748                              unsigned dst_level, unsigned dstx,
749                              unsigned dsty, unsigned dstz,
750                              struct r600_texture *rsrc,
751                              unsigned src_level,
752                              const struct pipe_box *src_box);
753 void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
754                                struct r600_texture *rtex,
755                                unsigned nr_samples,
756                                struct r600_fmask_info *out);
757 bool si_init_flushed_depth_texture(struct pipe_context *ctx,
758                                    struct pipe_resource *texture,
759                                    struct r600_texture **staging);
760 void si_print_texture_info(struct r600_common_screen *rscreen,
761                            struct r600_texture *rtex, struct u_log_context *log);
762 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
763                                         const struct pipe_resource *templ);
764 bool vi_dcc_formats_compatible(enum pipe_format format1,
765                                enum pipe_format format2);
766 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
767                                      unsigned level,
768                                      enum pipe_format view_format);
769 void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
770                                            struct pipe_resource *tex,
771                                            unsigned level,
772                                            enum pipe_format view_format);
773 struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
774                                               struct pipe_resource *texture,
775                                               const struct pipe_surface *templ,
776                                               unsigned width0, unsigned height0,
777                                               unsigned width, unsigned height);
778 unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
779 void vi_separate_dcc_start_query(struct pipe_context *ctx,
780                                  struct r600_texture *tex);
781 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
782                                 struct r600_texture *tex);
783 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
784                                              struct r600_texture *tex);
785 void vi_dcc_clear_level(struct r600_common_context *rctx,
786                         struct r600_texture *rtex,
787                         unsigned level, unsigned clear_value);
788 void si_do_fast_color_clear(struct r600_common_context *rctx,
789                             struct pipe_framebuffer_state *fb,
790                             struct r600_atom *fb_state,
791                             unsigned *buffers, ubyte *dirty_cbufs,
792                             const union pipe_color_union *color);
793 bool si_texture_disable_dcc(struct r600_common_context *rctx,
794                             struct r600_texture *rtex);
795 void si_init_screen_texture_functions(struct r600_common_screen *rscreen);
796 void si_init_context_texture_functions(struct r600_common_context *rctx);
797
798
799 /* Inline helpers. */
800
801 static inline struct r600_resource *r600_resource(struct pipe_resource *r)
802 {
803         return (struct r600_resource*)r;
804 }
805
806 static inline void
807 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
808 {
809         pipe_resource_reference((struct pipe_resource **)ptr,
810                                 (struct pipe_resource *)res);
811 }
812
813 static inline void
814 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
815 {
816         pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
817 }
818
819 static inline void
820 r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
821 {
822         struct r600_common_context *rctx = (struct r600_common_context *)ctx;
823         struct r600_resource *res = (struct r600_resource *)r;
824
825         if (res) {
826                 /* Add memory usage for need_gfx_cs_space */
827                 rctx->vram += res->vram_usage;
828                 rctx->gtt += res->gart_usage;
829         }
830 }
831
832 #define     SQ_TEX_XY_FILTER_POINT                         0x00
833 #define     SQ_TEX_XY_FILTER_BILINEAR                      0x01
834 #define     SQ_TEX_XY_FILTER_ANISO_POINT                   0x02
835 #define     SQ_TEX_XY_FILTER_ANISO_BILINEAR                0x03
836
837 static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
838 {
839         if (filter == PIPE_TEX_FILTER_LINEAR)
840                 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
841                                      : SQ_TEX_XY_FILTER_BILINEAR;
842         else
843                 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
844                                      : SQ_TEX_XY_FILTER_POINT;
845 }
846
847 static inline unsigned r600_tex_aniso_filter(unsigned filter)
848 {
849         if (filter < 2)
850                 return 0;
851         if (filter < 4)
852                 return 1;
853         if (filter < 8)
854                 return 2;
855         if (filter < 16)
856                 return 3;
857         return 4;
858 }
859
860 static inline enum radeon_bo_priority
861 r600_get_sampler_view_priority(struct r600_resource *res)
862 {
863         if (res->b.b.target == PIPE_BUFFER)
864                 return RADEON_PRIO_SAMPLER_BUFFER;
865
866         if (res->b.b.nr_samples > 1)
867                 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
868
869         return RADEON_PRIO_SAMPLER_TEXTURE;
870 }
871
872 static inline bool
873 r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
874 {
875         return (stencil_sampler && tex->can_sample_s) ||
876                (!stencil_sampler && tex->can_sample_z);
877 }
878
879 static inline bool
880 vi_dcc_enabled(struct r600_texture *tex, unsigned level)
881 {
882         return tex->dcc_offset && level < tex->surface.num_dcc_levels;
883 }
884
885 static inline bool
886 r600_htile_enabled(struct r600_texture *tex, unsigned level)
887 {
888         return tex->htile_offset && level == 0;
889 }
890
891 static inline bool
892 vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
893 {
894         assert(!tex->tc_compatible_htile || tex->htile_offset);
895         return tex->tc_compatible_htile && level == 0;
896 }
897
898 #define COMPUTE_DBG(rscreen, fmt, args...) \
899         do { \
900                 if ((rscreen->b.debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \
901         } while (0);
902
903 #define R600_ERR(fmt, args...) \
904         fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
905
906 static inline int S_FIXED(float value, unsigned frac_bits)
907 {
908         return value * (1 << frac_bits);
909 }
910
911 #endif