2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 VkResult genX(CreateQueryPool)(
37 const VkQueryPoolCreateInfo* pCreateInfo,
38 const VkAllocationCallbacks* pAllocator,
39 VkQueryPool* pQueryPool)
41 ANV_FROM_HANDLE(anv_device, device, _device);
42 struct anv_query_pool *pool;
47 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
49 switch (pCreateInfo->queryType) {
50 case VK_QUERY_TYPE_OCCLUSION:
51 case VK_QUERY_TYPE_TIMESTAMP:
53 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
54 return VK_ERROR_INCOMPATIBLE_DRIVER;
56 assert(!"Invalid query type");
59 slot_size = sizeof(struct anv_query_pool_slot);
60 pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
61 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
63 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
65 pool->type = pCreateInfo->queryType;
66 pool->slots = pCreateInfo->queryCount;
68 size = pCreateInfo->queryCount * slot_size;
69 result = anv_bo_init_new(&pool->bo, device, size);
70 if (result != VK_SUCCESS)
73 pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
75 *pQueryPool = anv_query_pool_to_handle(pool);
80 vk_free2(&device->alloc, pAllocator, pool);
85 void genX(DestroyQueryPool)(
88 const VkAllocationCallbacks* pAllocator)
90 ANV_FROM_HANDLE(anv_device, device, _device);
91 ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
96 anv_gem_munmap(pool->bo.map, pool->bo.size);
97 anv_gem_close(device, pool->bo.gem_handle);
98 vk_free2(&device->alloc, pAllocator, pool);
101 VkResult genX(GetQueryPoolResults)(
103 VkQueryPool queryPool,
109 VkQueryResultFlags flags)
111 ANV_FROM_HANDLE(anv_device, device, _device);
112 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
113 int64_t timeout = INT64_MAX;
117 assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
118 pool->type == VK_QUERY_TYPE_TIMESTAMP);
123 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
124 ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
126 /* We don't know the real error. */
127 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
128 "gem_wait failed %m");
132 void *data_end = pData + dataSize;
133 struct anv_query_pool_slot *slot = pool->bo.map;
135 if (!device->info.has_llc) {
136 uint64_t offset = firstQuery * sizeof(*slot);
137 uint64_t size = queryCount * sizeof(*slot);
138 anv_invalidate_range(pool->bo.map + offset,
139 MIN2(size, pool->bo.size - offset));
142 VkResult status = VK_SUCCESS;
143 for (uint32_t i = 0; i < queryCount; i++) {
144 bool available = slot[firstQuery + i].available;
146 /* From the Vulkan 1.0.42 spec:
148 * "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
149 * both not set then no result values are written to pData for
150 * queries that are in the unavailable state at the time of the call,
151 * and vkGetQueryPoolResults returns VK_NOT_READY. However,
152 * availability state is still written to pData for those queries if
153 * VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
155 bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
158 switch (pool->type) {
159 case VK_QUERY_TYPE_OCCLUSION: {
160 result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
163 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
164 unreachable("pipeline stats not supported");
165 case VK_QUERY_TYPE_TIMESTAMP: {
166 result = slot[firstQuery + i].begin;
170 unreachable("invalid pool type");
173 status = VK_NOT_READY;
176 if (flags & VK_QUERY_RESULT_64_BIT) {
177 uint64_t *dst = pData;
180 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
181 dst[1] = slot[firstQuery + i].available;
183 uint32_t *dst = pData;
184 if (result > UINT32_MAX)
188 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
189 dst[1] = slot[firstQuery + i].available;
193 if (pData >= data_end)
201 emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
202 struct anv_bo *bo, uint32_t offset)
204 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
205 pc.DestinationAddressType = DAT_PPGTT;
206 pc.PostSyncOperation = WritePSDepthCount;
207 pc.DepthStallEnable = true;
208 pc.Address = (struct anv_address) { bo, offset };
210 if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
211 pc.CommandStreamerStallEnable = true;
216 emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
217 struct anv_bo *bo, uint32_t offset)
219 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
220 pc.DestinationAddressType = DAT_PPGTT;
221 pc.PostSyncOperation = WriteImmediateData;
222 pc.Address = (struct anv_address) { bo, offset };
223 pc.ImmediateData = 1;
227 void genX(CmdResetQueryPool)(
228 VkCommandBuffer commandBuffer,
229 VkQueryPool queryPool,
233 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
234 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
236 for (uint32_t i = 0; i < queryCount; i++) {
237 switch (pool->type) {
238 case VK_QUERY_TYPE_OCCLUSION:
239 case VK_QUERY_TYPE_TIMESTAMP: {
240 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
241 sdm.Address = (struct anv_address) {
243 .offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot) +
244 offsetof(struct anv_query_pool_slot, available),
252 assert(!"Invalid query type");
257 void genX(CmdBeginQuery)(
258 VkCommandBuffer commandBuffer,
259 VkQueryPool queryPool,
261 VkQueryControlFlags flags)
263 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
264 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
266 /* Workaround: When meta uses the pipeline with the VS disabled, it seems
267 * that the pipelining of the depth write breaks. What we see is that
268 * samples from the render pass clear leaks into the first query
269 * immediately after the clear. Doing a pipecontrol with a post-sync
270 * operation and DepthStallEnable seems to work around the issue.
272 if (cmd_buffer->state.need_query_wa) {
273 cmd_buffer->state.need_query_wa = false;
274 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
275 pc.DepthCacheFlushEnable = true;
276 pc.DepthStallEnable = true;
280 switch (pool->type) {
281 case VK_QUERY_TYPE_OCCLUSION:
282 emit_ps_depth_count(cmd_buffer, &pool->bo,
283 query * sizeof(struct anv_query_pool_slot));
286 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
292 void genX(CmdEndQuery)(
293 VkCommandBuffer commandBuffer,
294 VkQueryPool queryPool,
297 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
298 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
300 switch (pool->type) {
301 case VK_QUERY_TYPE_OCCLUSION:
302 emit_ps_depth_count(cmd_buffer, &pool->bo,
303 query * sizeof(struct anv_query_pool_slot) + 8);
305 emit_query_availability(cmd_buffer, &pool->bo,
306 query * sizeof(struct anv_query_pool_slot) + 16);
309 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
315 #define TIMESTAMP 0x2358
317 void genX(CmdWriteTimestamp)(
318 VkCommandBuffer commandBuffer,
319 VkPipelineStageFlagBits pipelineStage,
320 VkQueryPool queryPool,
323 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
324 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
325 uint32_t offset = query * sizeof(struct anv_query_pool_slot);
327 assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
329 switch (pipelineStage) {
330 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
331 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
332 srm.RegisterAddress = TIMESTAMP;
333 srm.MemoryAddress = (struct anv_address) { &pool->bo, offset };
335 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
336 srm.RegisterAddress = TIMESTAMP + 4;
337 srm.MemoryAddress = (struct anv_address) { &pool->bo, offset + 4 };
342 /* Everything else is bottom-of-pipe */
343 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
344 pc.DestinationAddressType = DAT_PPGTT;
345 pc.PostSyncOperation = WriteTimestamp;
346 pc.Address = (struct anv_address) { &pool->bo, offset };
348 if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
349 pc.CommandStreamerStallEnable = true;
354 emit_query_availability(cmd_buffer, &pool->bo, offset + 16);
357 #if GEN_GEN > 7 || GEN_IS_HASWELL
359 #define alu_opcode(v) __gen_uint((v), 20, 31)
360 #define alu_operand1(v) __gen_uint((v), 10, 19)
361 #define alu_operand2(v) __gen_uint((v), 0, 9)
362 #define alu(opcode, operand1, operand2) \
363 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
365 #define OPCODE_NOOP 0x000
366 #define OPCODE_LOAD 0x080
367 #define OPCODE_LOADINV 0x480
368 #define OPCODE_LOAD0 0x081
369 #define OPCODE_LOAD1 0x481
370 #define OPCODE_ADD 0x100
371 #define OPCODE_SUB 0x101
372 #define OPCODE_AND 0x102
373 #define OPCODE_OR 0x103
374 #define OPCODE_XOR 0x104
375 #define OPCODE_STORE 0x180
376 #define OPCODE_STOREINV 0x580
378 #define OPERAND_R0 0x00
379 #define OPERAND_R1 0x01
380 #define OPERAND_R2 0x02
381 #define OPERAND_R3 0x03
382 #define OPERAND_R4 0x04
383 #define OPERAND_SRCA 0x20
384 #define OPERAND_SRCB 0x21
385 #define OPERAND_ACCU 0x31
386 #define OPERAND_ZF 0x32
387 #define OPERAND_CF 0x33
389 #define CS_GPR(n) (0x2600 + (n) * 8)
392 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
393 struct anv_bo *bo, uint32_t offset)
395 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
396 lrm.RegisterAddress = reg,
397 lrm.MemoryAddress = (struct anv_address) { bo, offset };
399 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
400 lrm.RegisterAddress = reg + 4;
401 lrm.MemoryAddress = (struct anv_address) { bo, offset + 4 };
406 store_query_result(struct anv_batch *batch, uint32_t reg,
407 struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
409 anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
410 srm.RegisterAddress = reg;
411 srm.MemoryAddress = (struct anv_address) { bo, offset };
414 if (flags & VK_QUERY_RESULT_64_BIT) {
415 anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
416 srm.RegisterAddress = reg + 4;
417 srm.MemoryAddress = (struct anv_address) { bo, offset + 4 };
422 void genX(CmdCopyQueryPoolResults)(
423 VkCommandBuffer commandBuffer,
424 VkQueryPool queryPool,
428 VkDeviceSize destOffset,
429 VkDeviceSize destStride,
430 VkQueryResultFlags flags)
432 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
433 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
434 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
435 uint32_t slot_offset, dst_offset;
437 if (flags & VK_QUERY_RESULT_WAIT_BIT) {
438 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
439 pc.CommandStreamerStallEnable = true;
440 pc.StallAtPixelScoreboard = true;
444 dst_offset = buffer->offset + destOffset;
445 for (uint32_t i = 0; i < queryCount; i++) {
447 slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
448 switch (pool->type) {
449 case VK_QUERY_TYPE_OCCLUSION:
450 emit_load_alu_reg_u64(&cmd_buffer->batch,
451 CS_GPR(0), &pool->bo, slot_offset);
452 emit_load_alu_reg_u64(&cmd_buffer->batch,
453 CS_GPR(1), &pool->bo, slot_offset + 8);
455 /* FIXME: We need to clamp the result for 32 bit. */
457 uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
458 dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
459 dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
460 dw[3] = alu(OPCODE_SUB, 0, 0);
461 dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
464 case VK_QUERY_TYPE_TIMESTAMP:
465 emit_load_alu_reg_u64(&cmd_buffer->batch,
466 CS_GPR(2), &pool->bo, slot_offset);
470 unreachable("unhandled query type");
473 store_query_result(&cmd_buffer->batch,
474 CS_GPR(2), buffer->bo, dst_offset, flags);
476 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
477 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
478 &pool->bo, slot_offset + 16);
479 if (flags & VK_QUERY_RESULT_64_BIT)
480 store_query_result(&cmd_buffer->batch,
481 CS_GPR(0), buffer->bo, dst_offset + 8, flags);
483 store_query_result(&cmd_buffer->batch,
484 CS_GPR(0), buffer->bo, dst_offset + 4, flags);
487 dst_offset += destStride;
492 void genX(CmdCopyQueryPoolResults)(
493 VkCommandBuffer commandBuffer,
494 VkQueryPool queryPool,
498 VkDeviceSize destOffset,
499 VkDeviceSize destStride,
500 VkQueryResultFlags flags)
502 anv_finishme("Queries not yet supported on Ivy Bridge");