2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #include "r600_pipe.h"
25 #include "util/u_memory.h"
27 static bool r600_is_timer_query(unsigned type)
29 return type == PIPE_QUERY_TIME_ELAPSED ||
30 type == PIPE_QUERY_TIMESTAMP ||
31 type == PIPE_QUERY_TIMESTAMP_DISJOINT;
34 static bool r600_query_needs_begin(unsigned type)
36 return type != PIPE_QUERY_GPU_FINISHED &&
37 type != PIPE_QUERY_TIMESTAMP;
40 static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type)
42 unsigned j, i, num_results, buf_size = 4096;
44 /* Queries are normally read by the CPU after
45 * being written by the gpu, hence staging is probably a good
48 struct r600_resource *buf = (struct r600_resource*)
49 pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM,
50 PIPE_USAGE_STAGING, buf_size);
53 case PIPE_QUERY_OCCLUSION_COUNTER:
54 case PIPE_QUERY_OCCLUSION_PREDICATE:
55 results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
56 memset(results, 0, buf_size);
58 /* Set top bits for unused backends. */
59 num_results = buf_size / (16 * ctx->max_db);
60 for (j = 0; j < num_results; j++) {
61 for (i = 0; i < ctx->max_db; i++) {
62 if (!(ctx->backend_mask & (1<<i))) {
63 results[(i * 4)+1] = 0x80000000;
64 results[(i * 4)+3] = 0x80000000;
67 results += 4 * ctx->max_db;
69 ctx->ws->buffer_unmap(buf->buf);
71 case PIPE_QUERY_TIME_ELAPSED:
73 case PIPE_QUERY_PRIMITIVES_EMITTED:
74 case PIPE_QUERY_PRIMITIVES_GENERATED:
75 case PIPE_QUERY_SO_STATISTICS:
76 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
77 results = ctx->ws->buffer_map(buf->buf, ctx->cs, PIPE_TRANSFER_WRITE);
78 memset(results, 0, buf_size);
79 ctx->ws->buffer_unmap(buf->buf);
87 static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query)
89 struct radeon_winsys_cs *cs = ctx->cs;
92 r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
94 /* Get a new query buffer if needed. */
95 if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.b.width0) {
96 struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
97 *qbuf = query->buffer;
98 query->buffer.buf = r600_new_query_buffer(ctx, query->type);
99 query->buffer.results_end = 0;
100 query->buffer.previous = qbuf;
103 /* emit begin query */
104 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
105 va += query->buffer.results_end;
107 switch (query->type) {
108 case PIPE_QUERY_OCCLUSION_COUNTER:
109 case PIPE_QUERY_OCCLUSION_PREDICATE:
110 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
111 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
112 cs->buf[cs->cdw++] = va;
113 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
115 case PIPE_QUERY_PRIMITIVES_EMITTED:
116 case PIPE_QUERY_PRIMITIVES_GENERATED:
117 case PIPE_QUERY_SO_STATISTICS:
118 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
119 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
120 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
121 cs->buf[cs->cdw++] = va;
122 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
124 case PIPE_QUERY_TIME_ELAPSED:
125 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
126 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
127 cs->buf[cs->cdw++] = va;
128 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
129 cs->buf[cs->cdw++] = 0;
130 cs->buf[cs->cdw++] = 0;
135 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
136 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
138 if (r600_is_timer_query(query->type)) {
139 ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
141 ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
145 static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query)
147 struct radeon_winsys_cs *cs = ctx->cs;
150 /* The queries which need begin already called this in begin_query. */
151 if (!r600_query_needs_begin(query->type)) {
152 r600_need_cs_space(ctx, query->num_cs_dw, FALSE);
155 va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf);
157 switch (query->type) {
158 case PIPE_QUERY_OCCLUSION_COUNTER:
159 case PIPE_QUERY_OCCLUSION_PREDICATE:
160 va += query->buffer.results_end + 8;
161 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
162 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
163 cs->buf[cs->cdw++] = va;
164 cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
166 case PIPE_QUERY_PRIMITIVES_EMITTED:
167 case PIPE_QUERY_PRIMITIVES_GENERATED:
168 case PIPE_QUERY_SO_STATISTICS:
169 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
170 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
171 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
172 cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2;
173 cs->buf[cs->cdw++] = 0;
175 case PIPE_QUERY_TIME_ELAPSED:
176 va += query->buffer.results_end + query->result_size/2;
177 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
178 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
179 cs->buf[cs->cdw++] = va;
180 cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
181 cs->buf[cs->cdw++] = 0;
182 cs->buf[cs->cdw++] = 0;
187 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
188 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE);
190 query->buffer.results_end += query->result_size;
192 if (r600_query_needs_begin(query->type)) {
193 if (r600_is_timer_query(query->type)) {
194 ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
196 ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
201 static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query,
202 int operation, bool flag_wait)
204 struct radeon_winsys_cs *cs = ctx->cs;
206 if (operation == PREDICATION_OP_CLEAR) {
207 r600_need_cs_space(ctx, 3, FALSE);
209 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
210 cs->buf[cs->cdw++] = 0;
211 cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
213 struct r600_query_buffer *qbuf;
217 /* Find how many results there are. */
219 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
220 count += qbuf->results_end / query->result_size;
223 r600_need_cs_space(ctx, 5 * count, TRUE);
225 op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
226 (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
228 /* emit predicate packets for all data blocks */
229 for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
230 unsigned results_base = 0;
231 uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b.b);
233 while (results_base < qbuf->results_end) {
234 cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
235 cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
236 cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
237 cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
238 cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ);
239 results_base += query->result_size;
241 /* set CONTINUE bit for all packets except the first */
242 op |= PREDICATION_CONTINUE;
248 static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
250 struct r600_context *rctx = (struct r600_context *)ctx;
252 struct r600_query *query;
254 query = CALLOC_STRUCT(r600_query);
258 query->type = query_type;
260 switch (query_type) {
261 case PIPE_QUERY_OCCLUSION_COUNTER:
262 case PIPE_QUERY_OCCLUSION_PREDICATE:
263 query->result_size = 16 * rctx->max_db;
264 query->num_cs_dw = 6;
266 case PIPE_QUERY_TIME_ELAPSED:
267 query->result_size = 16;
268 query->num_cs_dw = 8;
270 case PIPE_QUERY_PRIMITIVES_EMITTED:
271 case PIPE_QUERY_PRIMITIVES_GENERATED:
272 case PIPE_QUERY_SO_STATISTICS:
273 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
274 /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
275 query->result_size = 32;
276 query->num_cs_dw = 6;
284 query->buffer.buf = r600_new_query_buffer(rctx, query_type);
285 if (!query->buffer.buf) {
289 return (struct pipe_query*)query;
292 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
294 struct r600_query *rquery = (struct r600_query*)query;
295 struct r600_query_buffer *prev = rquery->buffer.previous;
297 /* Release all query buffers. */
299 struct r600_query_buffer *qbuf = prev;
300 prev = prev->previous;
301 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
305 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
309 static void r600_update_occlusion_query_state(struct r600_context *rctx,
310 unsigned type, int diff)
312 if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
313 type == PIPE_QUERY_OCCLUSION_PREDICATE) {
316 rctx->num_occlusion_queries += diff;
317 assert(rctx->num_occlusion_queries >= 0);
319 enable = rctx->num_occlusion_queries != 0;
321 if (rctx->db_misc_state.occlusion_query_enabled != enable) {
322 rctx->db_misc_state.occlusion_query_enabled = enable;
323 r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
328 static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
330 struct r600_context *rctx = (struct r600_context *)ctx;
331 struct r600_query *rquery = (struct r600_query *)query;
332 struct r600_query_buffer *prev = rquery->buffer.previous;
334 if (!r600_query_needs_begin(rquery->type)) {
339 /* Discard the old query buffers. */
341 struct r600_query_buffer *qbuf = prev;
342 prev = prev->previous;
343 pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
347 /* Obtain a new buffer if the current one can't be mapped without a stall. */
348 if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
349 rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
350 pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
351 rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
354 rquery->buffer.results_end = 0;
355 rquery->buffer.previous = NULL;
357 r600_update_occlusion_query_state(rctx, rquery->type, 1);
359 r600_emit_query_begin(rctx, rquery);
361 if (r600_is_timer_query(rquery->type)) {
362 LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
364 LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
368 static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
370 struct r600_context *rctx = (struct r600_context *)ctx;
371 struct r600_query *rquery = (struct r600_query *)query;
373 r600_emit_query_end(rctx, rquery);
375 if (r600_query_needs_begin(rquery->type)) {
376 LIST_DELINIT(&rquery->list);
379 r600_update_occlusion_query_state(rctx, rquery->type, -1);
382 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
383 bool test_status_bit)
385 uint32_t *current_result = (uint32_t*)map;
388 start = (uint64_t)current_result[start_index] |
389 (uint64_t)current_result[start_index+1] << 32;
390 end = (uint64_t)current_result[end_index] |
391 (uint64_t)current_result[end_index+1] << 32;
393 if (!test_status_bit ||
394 ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
400 static boolean r600_get_query_buffer_result(struct r600_context *ctx,
401 struct r600_query *query,
402 struct r600_query_buffer *qbuf,
404 union r600_query_result *result)
406 unsigned results_base = 0;
409 map = ctx->ws->buffer_map(qbuf->buf->buf, ctx->cs,
411 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
415 /* count all results across all data blocks */
416 switch (query->type) {
417 case PIPE_QUERY_OCCLUSION_COUNTER:
418 while (results_base != qbuf->results_end) {
420 r600_query_read_result(map + results_base, 0, 2, true);
424 case PIPE_QUERY_OCCLUSION_PREDICATE:
425 while (results_base != qbuf->results_end) {
426 result->b = result->b ||
427 r600_query_read_result(map + results_base, 0, 2, true) != 0;
431 case PIPE_QUERY_TIME_ELAPSED:
432 while (results_base != qbuf->results_end) {
434 r600_query_read_result(map + results_base, 0, 2, false);
435 results_base += query->result_size;
438 case PIPE_QUERY_PRIMITIVES_EMITTED:
439 /* SAMPLE_STREAMOUTSTATS stores this structure:
441 * u64 NumPrimitivesWritten;
442 * u64 PrimitiveStorageNeeded;
444 * We only need NumPrimitivesWritten here. */
445 while (results_base != qbuf->results_end) {
447 r600_query_read_result(map + results_base, 2, 6, true);
448 results_base += query->result_size;
451 case PIPE_QUERY_PRIMITIVES_GENERATED:
452 /* Here we read PrimitiveStorageNeeded. */
453 while (results_base != qbuf->results_end) {
455 r600_query_read_result(map + results_base, 0, 4, true);
456 results_base += query->result_size;
459 case PIPE_QUERY_SO_STATISTICS:
460 while (results_base != qbuf->results_end) {
461 result->so.num_primitives_written +=
462 r600_query_read_result(map + results_base, 2, 6, true);
463 result->so.primitives_storage_needed +=
464 r600_query_read_result(map + results_base, 0, 4, true);
465 results_base += query->result_size;
468 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
469 while (results_base != qbuf->results_end) {
470 result->b = result->b ||
471 r600_query_read_result(map + results_base, 2, 6, true) !=
472 r600_query_read_result(map + results_base, 0, 4, true);
473 results_base += query->result_size;
480 ctx->ws->buffer_unmap(qbuf->buf->buf);
484 static boolean r600_get_query_result(struct pipe_context *ctx,
485 struct pipe_query *query,
486 boolean wait, void *vresult)
488 struct r600_context *rctx = (struct r600_context *)ctx;
489 struct r600_query *rquery = (struct r600_query *)query;
490 boolean *result_b = (boolean*)vresult;
491 uint64_t *result_u64 = (uint64_t*)vresult;
492 union r600_query_result result;
493 struct pipe_query_data_so_statistics *result_so =
494 (struct pipe_query_data_so_statistics*)vresult;
495 struct r600_query_buffer *qbuf;
497 memset(&result, 0, sizeof(result));
499 for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
500 if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, &result)) {
505 switch (rquery->type) {
506 case PIPE_QUERY_OCCLUSION_COUNTER:
507 case PIPE_QUERY_PRIMITIVES_EMITTED:
508 case PIPE_QUERY_PRIMITIVES_GENERATED:
509 *result_u64 = result.u64;
511 case PIPE_QUERY_OCCLUSION_PREDICATE:
512 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
513 *result_b = result.b;
515 case PIPE_QUERY_TIME_ELAPSED:
516 *result_u64 = (1000000 * result.u64) / rctx->screen->info.r600_clock_crystal_freq;
518 case PIPE_QUERY_SO_STATISTICS:
519 *result_so = result.so;
527 static void r600_render_condition(struct pipe_context *ctx,
528 struct pipe_query *query,
531 struct r600_context *rctx = (struct r600_context *)ctx;
532 struct r600_query *rquery = (struct r600_query *)query;
533 bool wait_flag = false;
535 rctx->current_render_cond = query;
536 rctx->current_render_cond_mode = mode;
539 if (rctx->predicate_drawing) {
540 rctx->predicate_drawing = false;
541 r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
546 if (mode == PIPE_RENDER_COND_WAIT ||
547 mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
551 rctx->predicate_drawing = true;
553 switch (rquery->type) {
554 case PIPE_QUERY_OCCLUSION_COUNTER:
555 case PIPE_QUERY_OCCLUSION_PREDICATE:
556 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
558 case PIPE_QUERY_PRIMITIVES_EMITTED:
559 case PIPE_QUERY_PRIMITIVES_GENERATED:
560 case PIPE_QUERY_SO_STATISTICS:
561 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
562 r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
569 void r600_suspend_nontimer_queries(struct r600_context *ctx)
571 struct r600_query *query;
573 LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
574 r600_emit_query_end(ctx, query);
576 assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
579 void r600_resume_nontimer_queries(struct r600_context *ctx)
581 struct r600_query *query;
583 assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
585 LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
586 r600_emit_query_begin(ctx, query);
590 void r600_suspend_timer_queries(struct r600_context *ctx)
592 struct r600_query *query;
594 LIST_FOR_EACH_ENTRY(query, &ctx->active_timer_queries, list) {
595 r600_emit_query_end(ctx, query);
598 assert(ctx->num_cs_dw_timer_queries_suspend == 0);
601 void r600_resume_timer_queries(struct r600_context *ctx)
603 struct r600_query *query;
605 assert(ctx->num_cs_dw_timer_queries_suspend == 0);
607 LIST_FOR_EACH_ENTRY(query, &ctx->active_timer_queries, list) {
608 r600_emit_query_begin(ctx, query);
612 void r600_init_query_functions(struct r600_context *rctx)
614 rctx->context.create_query = r600_create_query;
615 rctx->context.destroy_query = r600_destroy_query;
616 rctx->context.begin_query = r600_begin_query;
617 rctx->context.end_query = r600_end_query;
618 rctx->context.get_query_result = r600_get_query_result;
620 if (rctx->screen->info.r600_num_backends > 0)
621 rctx->context.render_condition = r600_render_condition;