OSDN Git Service

b5955d341bfcee5476f6cc6e111e5dd9aad4ee8b
[android-x86/external-mesa.git] / src / intel / vulkan / genX_query.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 VkResult genX(CreateQueryPool)(
36     VkDevice                                    _device,
37     const VkQueryPoolCreateInfo*                pCreateInfo,
38     const VkAllocationCallbacks*                pAllocator,
39     VkQueryPool*                                pQueryPool)
40 {
41    ANV_FROM_HANDLE(anv_device, device, _device);
42    struct anv_query_pool *pool;
43    VkResult result;
44    uint32_t slot_size;
45    uint64_t size;
46
47    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
48
49    switch (pCreateInfo->queryType) {
50    case VK_QUERY_TYPE_OCCLUSION:
51    case VK_QUERY_TYPE_TIMESTAMP:
52       break;
53    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
54       return VK_ERROR_INCOMPATIBLE_DRIVER;
55    default:
56       assert(!"Invalid query type");
57    }
58
59    slot_size = sizeof(struct anv_query_pool_slot);
60    pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
61                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
62    if (pool == NULL)
63       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
64
65    pool->type = pCreateInfo->queryType;
66    pool->slots = pCreateInfo->queryCount;
67
68    size = pCreateInfo->queryCount * slot_size;
69    result = anv_bo_init_new(&pool->bo, device, size);
70    if (result != VK_SUCCESS)
71       goto fail;
72
73    pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
74
75    *pQueryPool = anv_query_pool_to_handle(pool);
76
77    return VK_SUCCESS;
78
79  fail:
80    vk_free2(&device->alloc, pAllocator, pool);
81
82    return result;
83 }
84
85 void genX(DestroyQueryPool)(
86     VkDevice                                    _device,
87     VkQueryPool                                 _pool,
88     const VkAllocationCallbacks*                pAllocator)
89 {
90    ANV_FROM_HANDLE(anv_device, device, _device);
91    ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
92
93    if (!pool)
94       return;
95
96    anv_gem_munmap(pool->bo.map, pool->bo.size);
97    anv_gem_close(device, pool->bo.gem_handle);
98    vk_free2(&device->alloc, pAllocator, pool);
99 }
100
101 VkResult genX(GetQueryPoolResults)(
102     VkDevice                                    _device,
103     VkQueryPool                                 queryPool,
104     uint32_t                                    firstQuery,
105     uint32_t                                    queryCount,
106     size_t                                      dataSize,
107     void*                                       pData,
108     VkDeviceSize                                stride,
109     VkQueryResultFlags                          flags)
110 {
111    ANV_FROM_HANDLE(anv_device, device, _device);
112    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
113    int64_t timeout = INT64_MAX;
114    uint64_t result;
115    int ret;
116
117    assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
118           pool->type == VK_QUERY_TYPE_TIMESTAMP);
119
120    if (pData == NULL)
121       return VK_SUCCESS;
122
123    if (flags & VK_QUERY_RESULT_WAIT_BIT) {
124       ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
125       if (ret == -1) {
126          /* We don't know the real error. */
127          return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
128                           "gem_wait failed %m");
129       }
130    }
131
132    void *data_end = pData + dataSize;
133    struct anv_query_pool_slot *slot = pool->bo.map;
134
135    if (!device->info.has_llc) {
136       uint64_t offset = firstQuery * sizeof(*slot);
137       uint64_t size = queryCount * sizeof(*slot);
138       anv_invalidate_range(pool->bo.map + offset,
139                            MIN2(size, pool->bo.size - offset));
140    }
141
142    VkResult status = VK_SUCCESS;
143    for (uint32_t i = 0; i < queryCount; i++) {
144       bool available = slot[firstQuery + i].available;
145
146       /* From the Vulkan 1.0.42 spec:
147        *
148        *    "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
149        *    both not set then no result values are written to pData for
150        *    queries that are in the unavailable state at the time of the call,
151        *    and vkGetQueryPoolResults returns VK_NOT_READY. However,
152        *    availability state is still written to pData for those queries if
153        *    VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
154        */
155       bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
156
157       if (write_results) {
158          switch (pool->type) {
159          case VK_QUERY_TYPE_OCCLUSION: {
160             result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
161             break;
162          }
163          case VK_QUERY_TYPE_PIPELINE_STATISTICS:
164             unreachable("pipeline stats not supported");
165          case VK_QUERY_TYPE_TIMESTAMP: {
166             result = slot[firstQuery + i].begin;
167             break;
168          }
169          default:
170             unreachable("invalid pool type");
171          }
172       } else {
173          status = VK_NOT_READY;
174       }
175
176       if (flags & VK_QUERY_RESULT_64_BIT) {
177          uint64_t *dst = pData;
178          if (write_results)
179             dst[0] = result;
180          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
181             dst[1] = slot[firstQuery + i].available;
182       } else {
183          uint32_t *dst = pData;
184          if (result > UINT32_MAX)
185             result = UINT32_MAX;
186          if (write_results)
187             dst[0] = result;
188          if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
189             dst[1] = slot[firstQuery + i].available;
190       }
191
192       pData += stride;
193       if (pData >= data_end)
194          break;
195    }
196
197    return status;
198 }
199
200 static void
201 emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
202                     struct anv_bo *bo, uint32_t offset)
203 {
204    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
205       pc.DestinationAddressType  = DAT_PPGTT;
206       pc.PostSyncOperation       = WritePSDepthCount;
207       pc.DepthStallEnable        = true;
208       pc.Address                 = (struct anv_address) { bo, offset };
209
210       if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
211          pc.CommandStreamerStallEnable = true;
212    }
213 }
214
215 static void
216 emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
217                         struct anv_bo *bo, uint32_t offset)
218 {
219    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
220       pc.DestinationAddressType  = DAT_PPGTT;
221       pc.PostSyncOperation       = WriteImmediateData;
222       pc.Address                 = (struct anv_address) { bo, offset };
223       pc.ImmediateData           = 1;
224    }
225 }
226
227 void genX(CmdResetQueryPool)(
228     VkCommandBuffer                             commandBuffer,
229     VkQueryPool                                 queryPool,
230     uint32_t                                    firstQuery,
231     uint32_t                                    queryCount)
232 {
233    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
234    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
235
236    for (uint32_t i = 0; i < queryCount; i++) {
237       switch (pool->type) {
238       case VK_QUERY_TYPE_OCCLUSION:
239       case VK_QUERY_TYPE_TIMESTAMP: {
240          anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
241             sdm.Address = (struct anv_address) {
242                .bo = &pool->bo,
243                .offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot) +
244                          offsetof(struct anv_query_pool_slot, available),
245             };
246             sdm.DataDWord0 = 0;
247             sdm.DataDWord1 = 0;
248          }
249          break;
250       }
251       default:
252          assert(!"Invalid query type");
253       }
254    }
255 }
256
257 void genX(CmdBeginQuery)(
258     VkCommandBuffer                             commandBuffer,
259     VkQueryPool                                 queryPool,
260     uint32_t                                    query,
261     VkQueryControlFlags                         flags)
262 {
263    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
264    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
265
266    /* Workaround: When meta uses the pipeline with the VS disabled, it seems
267     * that the pipelining of the depth write breaks. What we see is that
268     * samples from the render pass clear leaks into the first query
269     * immediately after the clear. Doing a pipecontrol with a post-sync
270     * operation and DepthStallEnable seems to work around the issue.
271     */
272    if (cmd_buffer->state.need_query_wa) {
273       cmd_buffer->state.need_query_wa = false;
274       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
275          pc.DepthCacheFlushEnable   = true;
276          pc.DepthStallEnable        = true;
277       }
278    }
279
280    switch (pool->type) {
281    case VK_QUERY_TYPE_OCCLUSION:
282       emit_ps_depth_count(cmd_buffer, &pool->bo,
283                           query * sizeof(struct anv_query_pool_slot));
284       break;
285
286    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
287    default:
288       unreachable("");
289    }
290 }
291
292 void genX(CmdEndQuery)(
293     VkCommandBuffer                             commandBuffer,
294     VkQueryPool                                 queryPool,
295     uint32_t                                    query)
296 {
297    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
298    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
299
300    switch (pool->type) {
301    case VK_QUERY_TYPE_OCCLUSION:
302       emit_ps_depth_count(cmd_buffer, &pool->bo,
303                           query * sizeof(struct anv_query_pool_slot) + 8);
304
305       emit_query_availability(cmd_buffer, &pool->bo,
306                               query * sizeof(struct anv_query_pool_slot) + 16);
307       break;
308
309    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
310    default:
311       unreachable("");
312    }
313 }
314
315 #define TIMESTAMP 0x2358
316
317 void genX(CmdWriteTimestamp)(
318     VkCommandBuffer                             commandBuffer,
319     VkPipelineStageFlagBits                     pipelineStage,
320     VkQueryPool                                 queryPool,
321     uint32_t                                    query)
322 {
323    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
324    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
325    uint32_t offset = query * sizeof(struct anv_query_pool_slot);
326
327    assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
328
329    switch (pipelineStage) {
330    case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
331       anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
332          srm.RegisterAddress  = TIMESTAMP;
333          srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset };
334       }
335       anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
336          srm.RegisterAddress  = TIMESTAMP + 4;
337          srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset + 4 };
338       }
339       break;
340
341    default:
342       /* Everything else is bottom-of-pipe */
343       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
344          pc.DestinationAddressType  = DAT_PPGTT;
345          pc.PostSyncOperation       = WriteTimestamp;
346          pc.Address = (struct anv_address) { &pool->bo, offset };
347
348          if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
349             pc.CommandStreamerStallEnable = true;
350       }
351       break;
352    }
353
354    emit_query_availability(cmd_buffer, &pool->bo, offset + 16);
355 }
356
357 #if GEN_GEN > 7 || GEN_IS_HASWELL
358
359 #define alu_opcode(v)   __gen_uint((v),  20, 31)
360 #define alu_operand1(v) __gen_uint((v),  10, 19)
361 #define alu_operand2(v) __gen_uint((v),   0,  9)
362 #define alu(opcode, operand1, operand2) \
363    alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
364
365 #define OPCODE_NOOP      0x000
366 #define OPCODE_LOAD      0x080
367 #define OPCODE_LOADINV   0x480
368 #define OPCODE_LOAD0     0x081
369 #define OPCODE_LOAD1     0x481
370 #define OPCODE_ADD       0x100
371 #define OPCODE_SUB       0x101
372 #define OPCODE_AND       0x102
373 #define OPCODE_OR        0x103
374 #define OPCODE_XOR       0x104
375 #define OPCODE_STORE     0x180
376 #define OPCODE_STOREINV  0x580
377
378 #define OPERAND_R0   0x00
379 #define OPERAND_R1   0x01
380 #define OPERAND_R2   0x02
381 #define OPERAND_R3   0x03
382 #define OPERAND_R4   0x04
383 #define OPERAND_SRCA 0x20
384 #define OPERAND_SRCB 0x21
385 #define OPERAND_ACCU 0x31
386 #define OPERAND_ZF   0x32
387 #define OPERAND_CF   0x33
388
389 #define CS_GPR(n) (0x2600 + (n) * 8)
390
391 static void
392 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
393                       struct anv_bo *bo, uint32_t offset)
394 {
395    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
396       lrm.RegisterAddress  = reg,
397       lrm.MemoryAddress    = (struct anv_address) { bo, offset };
398    }
399    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
400       lrm.RegisterAddress  = reg + 4;
401       lrm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
402    }
403 }
404
405 static void
406 store_query_result(struct anv_batch *batch, uint32_t reg,
407                    struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
408 {
409    anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
410       srm.RegisterAddress  = reg;
411       srm.MemoryAddress    = (struct anv_address) { bo, offset };
412    }
413
414    if (flags & VK_QUERY_RESULT_64_BIT) {
415       anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
416          srm.RegisterAddress  = reg + 4;
417          srm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
418       }
419    }
420 }
421
422 void genX(CmdCopyQueryPoolResults)(
423     VkCommandBuffer                             commandBuffer,
424     VkQueryPool                                 queryPool,
425     uint32_t                                    firstQuery,
426     uint32_t                                    queryCount,
427     VkBuffer                                    destBuffer,
428     VkDeviceSize                                destOffset,
429     VkDeviceSize                                destStride,
430     VkQueryResultFlags                          flags)
431 {
432    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
433    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
434    ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
435    uint32_t slot_offset, dst_offset;
436
437    if (flags & VK_QUERY_RESULT_WAIT_BIT) {
438       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
439          pc.CommandStreamerStallEnable = true;
440          pc.StallAtPixelScoreboard     = true;
441       }
442    }
443
444    dst_offset = buffer->offset + destOffset;
445    for (uint32_t i = 0; i < queryCount; i++) {
446
447       slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
448       switch (pool->type) {
449       case VK_QUERY_TYPE_OCCLUSION:
450          emit_load_alu_reg_u64(&cmd_buffer->batch,
451                                CS_GPR(0), &pool->bo, slot_offset);
452          emit_load_alu_reg_u64(&cmd_buffer->batch,
453                                CS_GPR(1), &pool->bo, slot_offset + 8);
454
455          /* FIXME: We need to clamp the result for 32 bit. */
456
457          uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
458          dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
459          dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
460          dw[3] = alu(OPCODE_SUB, 0, 0);
461          dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
462          break;
463
464       case VK_QUERY_TYPE_TIMESTAMP:
465          emit_load_alu_reg_u64(&cmd_buffer->batch,
466                                CS_GPR(2), &pool->bo, slot_offset);
467          break;
468
469       default:
470          unreachable("unhandled query type");
471       }
472
473       store_query_result(&cmd_buffer->batch,
474                          CS_GPR(2), buffer->bo, dst_offset, flags);
475
476       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
477          emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
478                                &pool->bo, slot_offset + 16);
479          if (flags & VK_QUERY_RESULT_64_BIT)
480             store_query_result(&cmd_buffer->batch,
481                                CS_GPR(0), buffer->bo, dst_offset + 8, flags);
482          else
483             store_query_result(&cmd_buffer->batch,
484                                CS_GPR(0), buffer->bo, dst_offset + 4, flags);
485       }
486
487       dst_offset += destStride;
488    }
489 }
490
491 #else
492 void genX(CmdCopyQueryPoolResults)(
493     VkCommandBuffer                             commandBuffer,
494     VkQueryPool                                 queryPool,
495     uint32_t                                    firstQuery,
496     uint32_t                                    queryCount,
497     VkBuffer                                    destBuffer,
498     VkDeviceSize                                destOffset,
499     VkDeviceSize                                destStride,
500     VkQueryResultFlags                          flags)
501 {
502    anv_finishme("Queries not yet supported on Ivy Bridge");
503 }
504 #endif