OSDN Git Service

0e2b3047bcd233c47f035b3143fe89be3ceaa52e
[android-x86/external-mesa.git] / src / vulkan / gen8_cmd_buffer.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "gen8_pack.h"
33 #include "gen9_pack.h"
34
35 static uint32_t
36 cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
37 {
38    static const uint32_t push_constant_opcodes[] = {
39       [MESA_SHADER_VERTEX]                      = 21,
40       [MESA_SHADER_TESS_CTRL]                   = 25, /* HS */
41       [MESA_SHADER_TESS_EVAL]                   = 26, /* DS */
42       [MESA_SHADER_GEOMETRY]                    = 22,
43       [MESA_SHADER_FRAGMENT]                    = 23,
44       [MESA_SHADER_COMPUTE]                     = 0,
45    };
46
47    VkShaderStageFlags flushed = 0;
48
49    anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) {
50       if (stage == MESA_SHADER_COMPUTE)
51          continue;
52
53       struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
54
55       if (state.offset == 0)
56          continue;
57
58       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
59                      ._3DCommandSubOpcode = push_constant_opcodes[stage],
60                      .ConstantBody = {
61                         .PointerToConstantBuffer0 = { .offset = state.offset },
62                         .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
63                      });
64
65       flushed |= mesa_to_vk_shader_stage(stage);
66    }
67
68    cmd_buffer->state.push_constants_dirty &= ~flushed;
69
70    return flushed;
71 }
72
73 #if ANV_GEN == 8
74 static void
75 emit_viewport_state(struct anv_cmd_buffer *cmd_buffer,
76                     uint32_t count, const VkViewport *viewports)
77 {
78    struct anv_state sf_clip_state =
79       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
80    struct anv_state cc_state =
81       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
82
83    for (uint32_t i = 0; i < count; i++) {
84       const VkViewport *vp = &viewports[i];
85
86       /* The gen7 state struct has just the matrix and guardband fields, the
87        * gen8 struct adds the min/max viewport fields. */
88       struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
89          .ViewportMatrixElementm00 = vp->width / 2,
90          .ViewportMatrixElementm11 = vp->height / 2,
91          .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
92          .ViewportMatrixElementm30 = vp->x + vp->width / 2,
93          .ViewportMatrixElementm31 = vp->y + vp->height / 2,
94          .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
95          .XMinClipGuardband = -1.0f,
96          .XMaxClipGuardband = 1.0f,
97          .YMinClipGuardband = -1.0f,
98          .YMaxClipGuardband = 1.0f,
99          .XMinViewPort = vp->x,
100          .XMaxViewPort = vp->x + vp->width - 1,
101          .YMinViewPort = vp->y,
102          .YMaxViewPort = vp->y + vp->height - 1,
103       };
104
105       struct GENX(CC_VIEWPORT) cc_viewport = {
106          .MinimumDepth = vp->minDepth,
107          .MaximumDepth = vp->maxDepth
108       };
109
110       GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
111                                  &sf_clip_viewport);
112       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
113    }
114
115    if (!cmd_buffer->device->info.has_llc) {
116       anv_state_clflush(sf_clip_state);
117       anv_state_clflush(cc_state);
118    }
119
120    anv_batch_emit(&cmd_buffer->batch,
121                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
122                   .CCViewportPointer = cc_state.offset);
123    anv_batch_emit(&cmd_buffer->batch,
124                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP),
125                   .SFClipViewportPointer = sf_clip_state.offset);
126 }
127
128 void
129 gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
130 {
131    if (cmd_buffer->state.dynamic.viewport.count > 0) {
132       emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count,
133                           cmd_buffer->state.dynamic.viewport.viewports);
134    } else {
135       /* If viewport count is 0, this is taken to mean "use the default" */
136       emit_viewport_state(cmd_buffer, 1,
137                           &(VkViewport) {
138                              .x = 0.0f,
139                              .y = 0.0f,
140                              .width = cmd_buffer->state.framebuffer->width,
141                              .height = cmd_buffer->state.framebuffer->height,
142                              .minDepth = 0.0f,
143                              .maxDepth = 1.0f,
144                           });
145    }
146 }
147 #endif
148
149 static void
150 flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer)
151 {
152    if (cmd_buffer->state.current_pipeline != _3D) {
153       anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
154 #if ANV_GEN >= 9
155                      .MaskBits = 3,
156 #endif
157                      .PipelineSelection = _3D);
158       cmd_buffer->state.current_pipeline = _3D;
159    }
160 }
161
162 static void
163 cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
164 {
165    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
166    uint32_t *p;
167
168    uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
169
170    assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
171
172    flush_pipeline_select_3d(cmd_buffer);
173
174    if (vb_emit) {
175       const uint32_t num_buffers = __builtin_popcount(vb_emit);
176       const uint32_t num_dwords = 1 + num_buffers * 4;
177
178       p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
179                           GENX(3DSTATE_VERTEX_BUFFERS));
180       uint32_t vb, i = 0;
181       for_each_bit(vb, vb_emit) {
182          struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
183          uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
184
185          struct GENX(VERTEX_BUFFER_STATE) state = {
186             .VertexBufferIndex = vb,
187             .MemoryObjectControlState = GENX(MOCS),
188             .AddressModifyEnable = true,
189             .BufferPitch = pipeline->binding_stride[vb],
190             .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
191             .BufferSize = buffer->size - offset
192          };
193
194          GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state);
195          i++;
196       }
197    }
198
199    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
200       /* If somebody compiled a pipeline after starting a command buffer the
201        * scratch bo may have grown since we started this cmd buffer (and
202        * emitted STATE_BASE_ADDRESS).  If we're binding that pipeline now,
203        * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
204       if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
205          anv_cmd_buffer_emit_state_base_address(cmd_buffer);
206
207       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
208    }
209
210    /* We emit the binding tables and sampler tables first, then emit push
211     * constants and then finally emit binding table and sampler table
212     * pointers.  It has to happen in this order, since emitting the binding
213     * tables may change the push constants (in case of storage images). After
214     * emitting push constants, on SKL+ we have to emit the corresponding
215     * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect.
216     */
217    uint32_t dirty = 0;
218    if (cmd_buffer->state.descriptors_dirty)
219       dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer);
220
221    if (cmd_buffer->state.push_constants_dirty)
222       dirty |= cmd_buffer_flush_push_constants(cmd_buffer);
223
224    if (dirty)
225       gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
226
227    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
228       gen8_cmd_buffer_emit_viewport(cmd_buffer);
229
230    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
231       gen7_cmd_buffer_emit_scissor(cmd_buffer);
232
233    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
234                                   ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
235       uint32_t sf_dw[GENX(3DSTATE_SF_length)];
236       struct GENX(3DSTATE_SF) sf = {
237          GENX(3DSTATE_SF_header),
238          .LineWidth = cmd_buffer->state.dynamic.line_width,
239       };
240       GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
241       /* FIXME: gen9.fs */
242       anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf);
243    }
244
245    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
246                                   ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
247       bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f ||
248          cmd_buffer->state.dynamic.depth_bias.slope != 0.0f;
249
250       uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
251       struct GENX(3DSTATE_RASTER) raster = {
252          GENX(3DSTATE_RASTER_header),
253          .GlobalDepthOffsetEnableSolid = enable_bias,
254          .GlobalDepthOffsetEnableWireframe = enable_bias,
255          .GlobalDepthOffsetEnablePoint = enable_bias,
256          .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
257          .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
258          .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
259       };
260       GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
261       anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
262                            pipeline->gen8.raster);
263    }
264
265    /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
266     * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
267     * across different state packets for gen8 and gen9. We handle that by
268     * using a big old #if switch here.
269     */
270 #if ANV_GEN == 8
271    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
272                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
273       struct anv_state cc_state =
274          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
275                                             GEN8_COLOR_CALC_STATE_length * 4,
276                                             64);
277       struct GEN8_COLOR_CALC_STATE cc = {
278          .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
279          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
280          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
281          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
282          .StencilReferenceValue =
283             cmd_buffer->state.dynamic.stencil_reference.front,
284          .BackFaceStencilReferenceValue =
285             cmd_buffer->state.dynamic.stencil_reference.back,
286       };
287       GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
288
289       if (!cmd_buffer->device->info.has_llc)
290          anv_state_clflush(cc_state);
291
292       anv_batch_emit(&cmd_buffer->batch,
293                      GEN8_3DSTATE_CC_STATE_POINTERS,
294                      .ColorCalcStatePointer = cc_state.offset,
295                      .ColorCalcStatePointerValid = true);
296    }
297
298    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
299                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
300                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
301       uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length];
302
303       struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
304          GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
305
306          /* Is this what we need to do? */
307          .StencilBufferWriteEnable =
308             cmd_buffer->state.dynamic.stencil_write_mask.front != 0,
309
310          .StencilTestMask =
311             cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff,
312          .StencilWriteMask =
313             cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff,
314
315          .BackfaceStencilTestMask =
316             cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff,
317          .BackfaceStencilWriteMask =
318             cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff,
319       };
320       GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw,
321                                          &wm_depth_stencil);
322
323       anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
324                            pipeline->gen8.wm_depth_stencil);
325    }
326 #else
327    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
328       struct anv_state cc_state =
329          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
330                                             GEN9_COLOR_CALC_STATE_length * 4,
331                                             64);
332       struct GEN9_COLOR_CALC_STATE cc = {
333          .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
334          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
335          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
336          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
337       };
338       GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
339
340       if (!cmd_buffer->device->info.has_llc)
341          anv_state_clflush(cc_state);
342
343       anv_batch_emit(&cmd_buffer->batch,
344                      GEN9_3DSTATE_CC_STATE_POINTERS,
345                      .ColorCalcStatePointer = cc_state.offset,
346                      .ColorCalcStatePointerValid = true);
347    }
348
349    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
350                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
351                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
352                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
353       uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
354       struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
355       struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
356          GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
357
358          .StencilBufferWriteEnable = d->stencil_write_mask.front != 0,
359
360          .StencilTestMask = d->stencil_compare_mask.front & 0xff,
361          .StencilWriteMask = d->stencil_write_mask.front & 0xff,
362
363          .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
364          .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
365
366          .StencilReferenceValue = d->stencil_reference.front,
367          .BackfaceStencilReferenceValue = d->stencil_reference.back
368       };
369       GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
370
371       anv_batch_emit_merge(&cmd_buffer->batch, dwords,
372                            pipeline->gen9.wm_depth_stencil);
373    }
374 #endif
375
376    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
377                                   ANV_CMD_DIRTY_INDEX_BUFFER)) {
378       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF),
379          .IndexedDrawCutIndexEnable = pipeline->primitive_restart,
380          .CutIndex = cmd_buffer->state.restart_index,
381       );
382    }
383
384    cmd_buffer->state.vb_dirty &= ~vb_emit;
385    cmd_buffer->state.dirty = 0;
386 }
387
388 void genX(CmdDraw)(
389     VkCommandBuffer                             commandBuffer,
390     uint32_t                                    vertexCount,
391     uint32_t                                    instanceCount,
392     uint32_t                                    firstVertex,
393     uint32_t                                    firstInstance)
394 {
395    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
396
397    cmd_buffer_flush_state(cmd_buffer);
398
399    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
400                   .VertexAccessType = SEQUENTIAL,
401                   .VertexCountPerInstance = vertexCount,
402                   .StartVertexLocation = firstVertex,
403                   .InstanceCount = instanceCount,
404                   .StartInstanceLocation = firstInstance,
405                   .BaseVertexLocation = 0);
406 }
407
408 void genX(CmdDrawIndexed)(
409     VkCommandBuffer                             commandBuffer,
410     uint32_t                                    indexCount,
411     uint32_t                                    instanceCount,
412     uint32_t                                    firstIndex,
413     int32_t                                     vertexOffset,
414     uint32_t                                    firstInstance)
415 {
416    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
417
418    cmd_buffer_flush_state(cmd_buffer);
419
420    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
421                   .VertexAccessType = RANDOM,
422                   .VertexCountPerInstance = indexCount,
423                   .StartVertexLocation = firstIndex,
424                   .InstanceCount = instanceCount,
425                   .StartInstanceLocation = firstInstance,
426                   .BaseVertexLocation = vertexOffset);
427 }
428
429 static void
430 emit_lrm(struct anv_batch *batch,
431          uint32_t reg, struct anv_bo *bo, uint32_t offset)
432 {
433    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
434                   .RegisterAddress = reg,
435                   .MemoryAddress = { bo, offset });
436 }
437
438 static void
439 emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
440 {
441    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
442                   .RegisterOffset = reg,
443                   .DataDWord = imm);
444 }
445
446 /* Auto-Draw / Indirect Registers */
447 #define GEN7_3DPRIM_END_OFFSET          0x2420
448 #define GEN7_3DPRIM_START_VERTEX        0x2430
449 #define GEN7_3DPRIM_VERTEX_COUNT        0x2434
450 #define GEN7_3DPRIM_INSTANCE_COUNT      0x2438
451 #define GEN7_3DPRIM_START_INSTANCE      0x243C
452 #define GEN7_3DPRIM_BASE_VERTEX         0x2440
453
454 void genX(CmdDrawIndirect)(
455     VkCommandBuffer                             commandBuffer,
456     VkBuffer                                    _buffer,
457     VkDeviceSize                                offset,
458     uint32_t                                    drawCount,
459     uint32_t                                    stride)
460 {
461    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
462    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
463    struct anv_bo *bo = buffer->bo;
464    uint32_t bo_offset = buffer->offset + offset;
465
466    cmd_buffer_flush_state(cmd_buffer);
467
468    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
469    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
470    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
471    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
472    emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
473
474    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
475                   .IndirectParameterEnable = true,
476                   .VertexAccessType = SEQUENTIAL);
477 }
478
479 void genX(CmdBindIndexBuffer)(
480     VkCommandBuffer                             commandBuffer,
481     VkBuffer                                    _buffer,
482     VkDeviceSize                                offset,
483     VkIndexType                                 indexType)
484 {
485    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
486    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
487
488    static const uint32_t vk_to_gen_index_type[] = {
489       [VK_INDEX_TYPE_UINT16]                    = INDEX_WORD,
490       [VK_INDEX_TYPE_UINT32]                    = INDEX_DWORD,
491    };
492
493    static const uint32_t restart_index_for_type[] = {
494       [VK_INDEX_TYPE_UINT16]                    = UINT16_MAX,
495       [VK_INDEX_TYPE_UINT32]                    = UINT32_MAX,
496    };
497
498    cmd_buffer->state.restart_index = restart_index_for_type[indexType];
499
500    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
501                   .IndexFormat = vk_to_gen_index_type[indexType],
502                   .MemoryObjectControlState = GENX(MOCS),
503                   .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
504                   .BufferSize = buffer->size - offset);
505
506    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
507 }
508
509 static VkResult
510 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
511 {
512    struct anv_device *device = cmd_buffer->device;
513    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
514    struct anv_state surfaces = { 0, }, samplers = { 0, };
515    VkResult result;
516
517    result = anv_cmd_buffer_emit_samplers(cmd_buffer,
518                                          MESA_SHADER_COMPUTE, &samplers);
519    if (result != VK_SUCCESS)
520       return result;
521    result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
522                                               MESA_SHADER_COMPUTE, &surfaces);
523    if (result != VK_SUCCESS)
524       return result;
525
526    struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
527
528    const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
529    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
530
531    unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
532    unsigned push_constant_data_size =
533       (prog_data->nr_params + local_id_dwords) * 4;
534    unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
535    unsigned push_constant_regs = reg_aligned_constant_size / 32;
536
537    if (push_state.alloc_size) {
538       anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
539                      .CURBETotalDataLength = push_state.alloc_size,
540                      .CURBEDataStartAddress = push_state.offset);
541    }
542
543    struct anv_state state =
544       anv_state_pool_emit(&device->dynamic_state_pool,
545                           GENX(INTERFACE_DESCRIPTOR_DATA), 64,
546                           .KernelStartPointer = pipeline->cs_simd,
547                           .KernelStartPointerHigh = 0,
548                           .BindingTablePointer = surfaces.offset,
549                           .BindingTableEntryCount = 0,
550                           .SamplerStatePointer = samplers.offset,
551                           .SamplerCount = 0,
552                           .ConstantIndirectURBEntryReadLength = push_constant_regs,
553                           .ConstantURBEntryReadOffset = 0,
554                           .BarrierEnable = cs_prog_data->uses_barrier,
555                           .NumberofThreadsinGPGPUThreadGroup =
556                              pipeline->cs_thread_width_max);
557
558    uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
559    anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
560                   .InterfaceDescriptorTotalLength = size,
561                   .InterfaceDescriptorDataStartAddress = state.offset);
562
563    return VK_SUCCESS;
564 }
565
566 static void
567 cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
568 {
569    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
570    VkResult result;
571
572    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
573
574    if (cmd_buffer->state.current_pipeline != GPGPU) {
575       anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
576 #if ANV_GEN >= 9
577                      .MaskBits = 3,
578 #endif
579                      .PipelineSelection = GPGPU);
580       cmd_buffer->state.current_pipeline = GPGPU;
581    }
582
583    if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
584       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
585
586    if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
587        (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
588       result = flush_compute_descriptor_set(cmd_buffer);
589       assert(result == VK_SUCCESS);
590       cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
591    }
592
593    cmd_buffer->state.compute_dirty = 0;
594 }
595
596 void genX(CmdDrawIndexedIndirect)(
597     VkCommandBuffer                             commandBuffer,
598     VkBuffer                                    _buffer,
599     VkDeviceSize                                offset,
600     uint32_t                                    drawCount,
601     uint32_t                                    stride)
602 {
603    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
604    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
605    struct anv_bo *bo = buffer->bo;
606    uint32_t bo_offset = buffer->offset + offset;
607
608    cmd_buffer_flush_state(cmd_buffer);
609
610    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
611    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
612    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
613    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
614    emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
615
616    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
617                   .IndirectParameterEnable = true,
618                   .VertexAccessType = RANDOM);
619 }
620
621 void genX(CmdDispatch)(
622     VkCommandBuffer                             commandBuffer,
623     uint32_t                                    x,
624     uint32_t                                    y,
625     uint32_t                                    z)
626 {
627    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
628    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
629    struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
630
631    if (prog_data->uses_num_work_groups) {
632       struct anv_state state =
633          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
634       uint32_t *sizes = state.map;
635       sizes[0] = x;
636       sizes[1] = y;
637       sizes[2] = z;
638       if (!cmd_buffer->device->info.has_llc)
639          anv_state_clflush(state);
640       cmd_buffer->state.num_workgroups_offset = state.offset;
641       cmd_buffer->state.num_workgroups_bo =
642          &cmd_buffer->device->dynamic_state_block_pool.bo;
643    }
644
645    cmd_buffer_flush_compute_state(cmd_buffer);
646
647    anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
648                   .SIMDSize = prog_data->simd_size / 16,
649                   .ThreadDepthCounterMaximum = 0,
650                   .ThreadHeightCounterMaximum = 0,
651                   .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
652                   .ThreadGroupIDXDimension = x,
653                   .ThreadGroupIDYDimension = y,
654                   .ThreadGroupIDZDimension = z,
655                   .RightExecutionMask = pipeline->cs_right_mask,
656                   .BottomExecutionMask = 0xffffffff);
657
658    anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
659 }
660
661 #define GPGPU_DISPATCHDIMX 0x2500
662 #define GPGPU_DISPATCHDIMY 0x2504
663 #define GPGPU_DISPATCHDIMZ 0x2508
664
665 void genX(CmdDispatchIndirect)(
666     VkCommandBuffer                             commandBuffer,
667     VkBuffer                                    _buffer,
668     VkDeviceSize                                offset)
669 {
670    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
671    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
672    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
673    struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
674    struct anv_bo *bo = buffer->bo;
675    uint32_t bo_offset = buffer->offset + offset;
676
677    if (prog_data->uses_num_work_groups) {
678       cmd_buffer->state.num_workgroups_offset = bo_offset;
679       cmd_buffer->state.num_workgroups_bo = bo;
680    }
681
682    cmd_buffer_flush_compute_state(cmd_buffer);
683
684    emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
685    emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
686    emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
687
688    anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
689                   .IndirectParameterEnable = true,
690                   .SIMDSize = prog_data->simd_size / 16,
691                   .ThreadDepthCounterMaximum = 0,
692                   .ThreadHeightCounterMaximum = 0,
693                   .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
694                   .RightExecutionMask = pipeline->cs_right_mask,
695                   .BottomExecutionMask = 0xffffffff);
696
697    anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
698 }
699
700 static void
701 cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
702 {
703    const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
704    const struct anv_image_view *iview =
705       anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
706    const struct anv_image *image = iview ? iview->image : NULL;
707
708    /* XXX: isl needs to grow depth format support */
709    const struct anv_format *anv_format =
710       iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
711
712    const bool has_depth = iview && anv_format->depth_format;
713    const bool has_stencil = iview && anv_format->has_stencil;
714
715    /* FIXME: Implement the PMA stall W/A */
716    /* FIXME: Width and Height are wrong */
717
718    /* Emit 3DSTATE_DEPTH_BUFFER */
719    if (has_depth) {
720       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
721          .SurfaceType = SURFTYPE_2D,
722          .DepthWriteEnable = anv_format->depth_format,
723          .StencilWriteEnable = has_stencil,
724          .HierarchicalDepthBufferEnable = false,
725          .SurfaceFormat = anv_format->depth_format,
726          .SurfacePitch = image->depth_surface.isl.row_pitch - 1,
727          .SurfaceBaseAddress = {
728             .bo = image->bo,
729             .offset = image->depth_surface.offset,
730          },
731          .Height = fb->height - 1,
732          .Width = fb->width - 1,
733          .LOD = 0,
734          .Depth = 1 - 1,
735          .MinimumArrayElement = 0,
736          .DepthBufferObjectControlState = GENX(MOCS),
737          .RenderTargetViewExtent = 1 - 1,
738          .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2);
739    } else {
740       /* Even when no depth buffer is present, the hardware requires that
741        * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
742        *
743        *    If a null depth buffer is bound, the driver must instead bind depth as:
744        *       3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
745        *       3DSTATE_DEPTH.Width = 1
746        *       3DSTATE_DEPTH.Height = 1
747        *       3DSTATE_DEPTH.SuraceFormat = D16_UNORM
748        *       3DSTATE_DEPTH.SurfaceBaseAddress = 0
749        *       3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
750        *       3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
751        *       3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
752        *
753        * The PRM is wrong, though. The width and height must be programmed to
754        * actual framebuffer's width and height, even when neither depth buffer
755        * nor stencil buffer is present.
756        */
757       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
758          .SurfaceType = SURFTYPE_2D,
759          .SurfaceFormat = D16_UNORM,
760          .Width = fb->width - 1,
761          .Height = fb->height - 1,
762          .StencilWriteEnable = has_stencil);
763    }
764
765    /* Emit 3DSTATE_STENCIL_BUFFER */
766    if (has_stencil) {
767       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER),
768          .StencilBufferEnable = true,
769          .StencilBufferObjectControlState = GENX(MOCS),
770
771          /* Stencil buffers have strange pitch. The PRM says:
772           *
773           *    The pitch must be set to 2x the value computed based on width,
774           *    as the stencil buffer is stored with two rows interleaved.
775           */
776          .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1,
777
778          .SurfaceBaseAddress = {
779             .bo = image->bo,
780             .offset = image->offset + image->stencil_surface.offset,
781          },
782          .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2);
783    } else {
784       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER));
785    }
786
787    /* Disable hierarchial depth buffers. */
788    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER));
789
790    /* Clear the clear params. */
791    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS));
792 }
793
794 void
795 genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer,
796                                struct anv_subpass *subpass)
797 {
798    cmd_buffer->state.subpass = subpass;
799
800    cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
801
802    cmd_buffer_emit_depth_stencil(cmd_buffer);
803 }
804
805 void genX(CmdBeginRenderPass)(
806     VkCommandBuffer                             commandBuffer,
807     const VkRenderPassBeginInfo*                pRenderPassBegin,
808     VkSubpassContents                           contents)
809 {
810    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
811    ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
812    ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
813
814    cmd_buffer->state.framebuffer = framebuffer;
815    cmd_buffer->state.pass = pass;
816    anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin);
817
818    flush_pipeline_select_3d(cmd_buffer);
819
820    const VkRect2D *render_area = &pRenderPassBegin->renderArea;
821
822    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE),
823                   .ClippedDrawingRectangleYMin = render_area->offset.y,
824                   .ClippedDrawingRectangleXMin = render_area->offset.x,
825                   .ClippedDrawingRectangleYMax =
826                      render_area->offset.y + render_area->extent.height - 1,
827                   .ClippedDrawingRectangleXMax =
828                      render_area->offset.x + render_area->extent.width - 1,
829                   .DrawingRectangleOriginY = 0,
830                   .DrawingRectangleOriginX = 0);
831
832    anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
833                                     pRenderPassBegin->pClearValues);
834
835    genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses);
836 }
837
838 void genX(CmdNextSubpass)(
839     VkCommandBuffer                             commandBuffer,
840     VkSubpassContents                           contents)
841 {
842    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
843
844    assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
845
846    genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
847 }
848
849 void genX(CmdEndRenderPass)(
850     VkCommandBuffer                             commandBuffer)
851 {
852    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
853
854    /* Emit a flushing pipe control at the end of a pass.  This is kind of a
855     * hack but it ensures that render targets always actually get written.
856     * Eventually, we should do flushing based on image format transitions
857     * or something of that nature.
858     */
859    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
860                   .PostSyncOperation = NoWrite,
861                   .RenderTargetCacheFlushEnable = true,
862                   .InstructionCacheInvalidateEnable = true,
863                   .DepthCacheFlushEnable = true,
864                   .VFCacheInvalidationEnable = true,
865                   .TextureCacheInvalidationEnable = true,
866                   .CommandStreamerStallEnable = true);
867 }
868
869 static void
870 emit_ps_depth_count(struct anv_batch *batch,
871                     struct anv_bo *bo, uint32_t offset)
872 {
873    anv_batch_emit(batch, GENX(PIPE_CONTROL),
874                   .DestinationAddressType = DAT_PPGTT,
875                   .PostSyncOperation = WritePSDepthCount,
876                   .DepthStallEnable = true,
877                   .Address = { bo, offset });
878 }
879
880 static void
881 emit_query_availability(struct anv_batch *batch,
882                         struct anv_bo *bo, uint32_t offset)
883 {
884    anv_batch_emit(batch, GENX(PIPE_CONTROL),
885                   .DestinationAddressType = DAT_PPGTT,
886                   .PostSyncOperation = WriteImmediateData,
887                   .Address = { bo, offset },
888                   .ImmediateData = 1);
889 }
890
891 void genX(CmdBeginQuery)(
892     VkCommandBuffer                             commandBuffer,
893     VkQueryPool                                 queryPool,
894     uint32_t                                    query,
895     VkQueryControlFlags                         flags)
896 {
897    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
898    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
899
900    /* Workaround: When meta uses the pipeline with the VS disabled, it seems
901     * that the pipelining of the depth write breaks. What we see is that
902     * samples from the render pass clear leaks into the first query
903     * immediately after the clear. Doing a pipecontrol with a post-sync
904     * operation and DepthStallEnable seems to work around the issue.
905     */
906    if (cmd_buffer->state.need_query_wa) {
907       cmd_buffer->state.need_query_wa = false;
908       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
909                      .DepthCacheFlushEnable = true,
910                      .DepthStallEnable = true);
911    }
912
913    switch (pool->type) {
914    case VK_QUERY_TYPE_OCCLUSION:
915       emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
916                           query * sizeof(struct anv_query_pool_slot));
917       break;
918
919    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
920    default:
921       unreachable("");
922    }
923 }
924
925 void genX(CmdEndQuery)(
926     VkCommandBuffer                             commandBuffer,
927     VkQueryPool                                 queryPool,
928     uint32_t                                    query)
929 {
930    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
931    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
932
933    switch (pool->type) {
934    case VK_QUERY_TYPE_OCCLUSION:
935       emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
936                           query * sizeof(struct anv_query_pool_slot) + 8);
937
938       emit_query_availability(&cmd_buffer->batch, &pool->bo,
939                               query * sizeof(struct anv_query_pool_slot) + 16);
940       break;
941
942    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
943    default:
944       unreachable("");
945    }
946 }
947
948 #define TIMESTAMP 0x2358
949
950 void genX(CmdWriteTimestamp)(
951     VkCommandBuffer                             commandBuffer,
952     VkPipelineStageFlagBits                     pipelineStage,
953     VkQueryPool                                 queryPool,
954     uint32_t                                    query)
955 {
956    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
957    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
958    uint32_t offset = query * sizeof(struct anv_query_pool_slot);
959
960    assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
961
962    switch (pipelineStage) {
963    case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
964       anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
965                      .RegisterAddress = TIMESTAMP,
966                      .MemoryAddress = { &pool->bo, offset });
967       anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
968                      .RegisterAddress = TIMESTAMP + 4,
969                      .MemoryAddress = { &pool->bo, offset + 4 });
970       break;
971
972    default:
973       /* Everything else is bottom-of-pipe */
974       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
975                      .DestinationAddressType = DAT_PPGTT,
976                      .PostSyncOperation = WriteTimestamp,
977                      .Address = { &pool->bo, offset });
978       break;
979    }
980
981    emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16);
982 }
983
984 #define alu_opcode(v)   __gen_field((v),  20, 31)
985 #define alu_operand1(v) __gen_field((v),  10, 19)
986 #define alu_operand2(v) __gen_field((v),   0,  9)
987 #define alu(opcode, operand1, operand2) \
988    alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
989
990 #define OPCODE_NOOP      0x000
991 #define OPCODE_LOAD      0x080
992 #define OPCODE_LOADINV   0x480
993 #define OPCODE_LOAD0     0x081
994 #define OPCODE_LOAD1     0x481
995 #define OPCODE_ADD       0x100
996 #define OPCODE_SUB       0x101
997 #define OPCODE_AND       0x102
998 #define OPCODE_OR        0x103
999 #define OPCODE_XOR       0x104
1000 #define OPCODE_STORE     0x180
1001 #define OPCODE_STOREINV  0x580
1002
1003 #define OPERAND_R0   0x00
1004 #define OPERAND_R1   0x01
1005 #define OPERAND_R2   0x02
1006 #define OPERAND_R3   0x03
1007 #define OPERAND_R4   0x04
1008 #define OPERAND_SRCA 0x20
1009 #define OPERAND_SRCB 0x21
1010 #define OPERAND_ACCU 0x31
1011 #define OPERAND_ZF   0x32
1012 #define OPERAND_CF   0x33
1013
1014 #define CS_GPR(n) (0x2600 + (n) * 8)
1015
1016 static void
1017 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
1018                       struct anv_bo *bo, uint32_t offset)
1019 {
1020    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
1021                   .RegisterAddress = reg,
1022                   .MemoryAddress = { bo, offset });
1023    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
1024                   .RegisterAddress = reg + 4,
1025                   .MemoryAddress = { bo, offset + 4 });
1026 }
1027
1028 static void
1029 store_query_result(struct anv_batch *batch, uint32_t reg,
1030                    struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
1031 {
1032       anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
1033                      .RegisterAddress = reg,
1034                      .MemoryAddress = { bo, offset });
1035
1036       if (flags & VK_QUERY_RESULT_64_BIT)
1037          anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
1038                         .RegisterAddress = reg + 4,
1039                         .MemoryAddress = { bo, offset + 4 });
1040 }
1041
1042 void genX(CmdCopyQueryPoolResults)(
1043     VkCommandBuffer                             commandBuffer,
1044     VkQueryPool                                 queryPool,
1045     uint32_t                                    firstQuery,
1046     uint32_t                                    queryCount,
1047     VkBuffer                                    destBuffer,
1048     VkDeviceSize                                destOffset,
1049     VkDeviceSize                                destStride,
1050     VkQueryResultFlags                          flags)
1051 {
1052    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1053    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
1054    ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
1055    uint32_t slot_offset, dst_offset;
1056
1057    if (flags & VK_QUERY_RESULT_WAIT_BIT)
1058       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
1059                      .CommandStreamerStallEnable = true,
1060                      .StallAtPixelScoreboard = true);
1061
1062    dst_offset = buffer->offset + destOffset;
1063    for (uint32_t i = 0; i < queryCount; i++) {
1064
1065       slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot);
1066       switch (pool->type) {
1067       case VK_QUERY_TYPE_OCCLUSION:
1068          emit_load_alu_reg_u64(&cmd_buffer->batch,
1069                                CS_GPR(0), &pool->bo, slot_offset);
1070          emit_load_alu_reg_u64(&cmd_buffer->batch,
1071                                CS_GPR(1), &pool->bo, slot_offset + 8);
1072
1073          /* FIXME: We need to clamp the result for 32 bit. */
1074
1075          uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
1076          dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
1077          dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
1078          dw[3] = alu(OPCODE_SUB, 0, 0);
1079          dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
1080          break;
1081
1082       case VK_QUERY_TYPE_TIMESTAMP:
1083          emit_load_alu_reg_u64(&cmd_buffer->batch,
1084                                CS_GPR(2), &pool->bo, slot_offset);
1085          break;
1086
1087       default:
1088          unreachable("unhandled query type");
1089       }
1090
1091       store_query_result(&cmd_buffer->batch,
1092                          CS_GPR(2), buffer->bo, dst_offset, flags);
1093
1094       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
1095          emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0),
1096                                &pool->bo, slot_offset + 16);
1097          if (flags & VK_QUERY_RESULT_64_BIT)
1098             store_query_result(&cmd_buffer->batch,
1099                                CS_GPR(0), buffer->bo, dst_offset + 8, flags);
1100          else
1101             store_query_result(&cmd_buffer->batch,
1102                                CS_GPR(0), buffer->bo, dst_offset + 4, flags);
1103       }
1104
1105       dst_offset += destStride;
1106    }
1107 }
1108
1109 void genX(CmdSetEvent)(
1110     VkCommandBuffer                             commandBuffer,
1111     VkEvent                                     _event,
1112     VkPipelineStageFlags                        stageMask)
1113 {
1114    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1115    ANV_FROM_HANDLE(anv_event, event, _event);
1116
1117    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
1118                   .DestinationAddressType = DAT_PPGTT,
1119                   .PostSyncOperation = WriteImmediateData,
1120                   .Address = {
1121                      &cmd_buffer->device->dynamic_state_block_pool.bo,
1122                      event->state.offset
1123                    },
1124                   .ImmediateData = VK_EVENT_SET);
1125 }
1126
1127 void genX(CmdResetEvent)(
1128     VkCommandBuffer                             commandBuffer,
1129     VkEvent                                     _event,
1130     VkPipelineStageFlags                        stageMask)
1131 {
1132    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1133    ANV_FROM_HANDLE(anv_event, event, _event);
1134
1135    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
1136                   .DestinationAddressType = DAT_PPGTT,
1137                   .PostSyncOperation = WriteImmediateData,
1138                   .Address = {
1139                      &cmd_buffer->device->dynamic_state_block_pool.bo,
1140                      event->state.offset
1141                    },
1142                   .ImmediateData = VK_EVENT_RESET);
1143 }
1144
1145 void genX(CmdWaitEvents)(
1146     VkCommandBuffer                             commandBuffer,
1147     uint32_t                                    eventCount,
1148     const VkEvent*                              pEvents,
1149     VkPipelineStageFlags                        srcStageMask,
1150     VkPipelineStageFlags                        destStageMask,
1151     uint32_t                                    memoryBarrierCount,
1152     const VkMemoryBarrier*                      pMemoryBarriers,
1153     uint32_t                                    bufferMemoryBarrierCount,
1154     const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
1155     uint32_t                                    imageMemoryBarrierCount,
1156     const VkImageMemoryBarrier*                 pImageMemoryBarriers)
1157 {
1158    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1159    for (uint32_t i = 0; i < eventCount; i++) {
1160       ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
1161
1162       anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT),
1163                      .WaitMode = PollingMode,
1164                      .CompareOperation = SAD_EQUAL_SDD,
1165                      .SemaphoreDataDword = VK_EVENT_SET,
1166                      .SemaphoreAddress = {
1167                         &cmd_buffer->device->dynamic_state_block_pool.bo,
1168                         event->state.offset
1169                      });
1170    }
1171
1172    genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
1173                             false, /* byRegion */
1174                             memoryBarrierCount, pMemoryBarriers,
1175                             bufferMemoryBarrierCount, pBufferMemoryBarriers,
1176                             imageMemoryBarrierCount, pImageMemoryBarriers);
1177 }