OSDN Git Service

637954555a053361bd0ea1582810663c4f22b7e6
[android-x86/external-mesa.git] / src / intel / vulkan / anv_cmd_buffer.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "vk_format_info.h"
33
34 /** \file anv_cmd_buffer.c
35  *
36  * This file contains all of the stuff for emitting commands into a command
37  * buffer.  This includes implementations of most of the vkCmd*
38  * entrypoints.  This file is concerned entirely with state emission and
39  * not with the command buffer data structure itself.  As far as this file
40  * is concerned, most of anv_cmd_buffer is magic.
41  */
42
43 /* TODO: These are taken from GLES.  We should check the Vulkan spec */
44 const struct anv_dynamic_state default_dynamic_state = {
45    .viewport = {
46       .count = 0,
47    },
48    .scissor = {
49       .count = 0,
50    },
51    .line_width = 1.0f,
52    .depth_bias = {
53       .bias = 0.0f,
54       .clamp = 0.0f,
55       .slope = 0.0f,
56    },
57    .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
58    .depth_bounds = {
59       .min = 0.0f,
60       .max = 1.0f,
61    },
62    .stencil_compare_mask = {
63       .front = ~0u,
64       .back = ~0u,
65    },
66    .stencil_write_mask = {
67       .front = ~0u,
68       .back = ~0u,
69    },
70    .stencil_reference = {
71       .front = 0u,
72       .back = 0u,
73    },
74 };
75
76 void
77 anv_dynamic_state_copy(struct anv_dynamic_state *dest,
78                        const struct anv_dynamic_state *src,
79                        uint32_t copy_mask)
80 {
81    if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
82       dest->viewport.count = src->viewport.count;
83       typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
84                    src->viewport.count);
85    }
86
87    if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
88       dest->scissor.count = src->scissor.count;
89       typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
90                    src->scissor.count);
91    }
92
93    if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH))
94       dest->line_width = src->line_width;
95
96    if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))
97       dest->depth_bias = src->depth_bias;
98
99    if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
100       typed_memcpy(dest->blend_constants, src->blend_constants, 4);
101
102    if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS))
103       dest->depth_bounds = src->depth_bounds;
104
105    if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))
106       dest->stencil_compare_mask = src->stencil_compare_mask;
107
108    if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))
109       dest->stencil_write_mask = src->stencil_write_mask;
110
111    if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
112       dest->stencil_reference = src->stencil_reference;
113 }
114
115 static void
116 anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
117 {
118    struct anv_cmd_state *state = &cmd_buffer->state;
119
120    memset(&state->descriptors, 0, sizeof(state->descriptors));
121    memset(&state->push_constants, 0, sizeof(state->push_constants));
122    memset(state->binding_tables, 0, sizeof(state->binding_tables));
123    memset(state->samplers, 0, sizeof(state->samplers));
124
125    /* 0 isn't a valid config.  This ensures that we always configure L3$. */
126    cmd_buffer->state.current_l3_config = 0;
127
128    state->dirty = 0;
129    state->vb_dirty = 0;
130    state->pending_pipe_bits = 0;
131    state->descriptors_dirty = 0;
132    state->push_constants_dirty = 0;
133    state->pipeline = NULL;
134    state->push_constant_stages = 0;
135    state->restart_index = UINT32_MAX;
136    state->dynamic = default_dynamic_state;
137    state->need_query_wa = true;
138
139    if (state->attachments != NULL) {
140       anv_free(&cmd_buffer->pool->alloc, state->attachments);
141       state->attachments = NULL;
142    }
143
144    state->gen7.index_buffer = NULL;
145 }
146
147 /**
148  * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
149  */
150 void
151 anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
152                                 const VkRenderPassBeginInfo *info)
153 {
154    struct anv_cmd_state *state = &cmd_buffer->state;
155    ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass);
156
157    anv_free(&cmd_buffer->pool->alloc, state->attachments);
158
159    if (pass->attachment_count == 0) {
160       state->attachments = NULL;
161       return;
162    }
163
164    state->attachments = anv_alloc(&cmd_buffer->pool->alloc,
165                                   pass->attachment_count *
166                                        sizeof(state->attachments[0]),
167                                   8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
168    if (state->attachments == NULL) {
169       /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
170       abort();
171    }
172
173    for (uint32_t i = 0; i < pass->attachment_count; ++i) {
174       struct anv_render_pass_attachment *att = &pass->attachments[i];
175       VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
176       VkImageAspectFlags clear_aspects = 0;
177
178       if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
179          /* color attachment */
180          if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
181             clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
182          }
183       } else {
184          /* depthstencil attachment */
185          if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
186              att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
187             clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
188          }
189          if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
190              att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
191             clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
192          }
193       }
194
195       state->attachments[i].pending_clear_aspects = clear_aspects;
196       if (clear_aspects) {
197          assert(info->clearValueCount > i);
198          state->attachments[i].clear_value = info->pClearValues[i];
199       }
200    }
201 }
202
203 static VkResult
204 anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
205                                           gl_shader_stage stage, uint32_t size)
206 {
207    struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage];
208
209    if (*ptr == NULL) {
210       *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8,
211                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
212       if (*ptr == NULL)
213          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
214    } else if ((*ptr)->size < size) {
215       *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8,
216                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
217       if (*ptr == NULL)
218          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
219    }
220    (*ptr)->size = size;
221
222    return VK_SUCCESS;
223 }
224
225 #define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
226    anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
227       (offsetof(struct anv_push_constants, field) + \
228        sizeof(cmd_buffer->state.push_constants[0]->field)))
229
230 static VkResult anv_create_cmd_buffer(
231     struct anv_device *                         device,
232     struct anv_cmd_pool *                       pool,
233     VkCommandBufferLevel                        level,
234     VkCommandBuffer*                            pCommandBuffer)
235 {
236    struct anv_cmd_buffer *cmd_buffer;
237    VkResult result;
238
239    cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
240                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
241    if (cmd_buffer == NULL)
242       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
243
244    cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
245    cmd_buffer->device = device;
246    cmd_buffer->pool = pool;
247    cmd_buffer->level = level;
248    cmd_buffer->state.attachments = NULL;
249
250    result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
251    if (result != VK_SUCCESS)
252       goto fail;
253
254    anv_state_stream_init(&cmd_buffer->surface_state_stream,
255                          &device->surface_state_block_pool);
256    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
257                          &device->dynamic_state_block_pool);
258
259    if (pool) {
260       list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
261    } else {
262       /* Init the pool_link so we can safefly call list_del when we destroy
263        * the command buffer
264        */
265       list_inithead(&cmd_buffer->pool_link);
266    }
267
268    *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
269
270    return VK_SUCCESS;
271
272  fail:
273    anv_free(&cmd_buffer->pool->alloc, cmd_buffer);
274
275    return result;
276 }
277
278 VkResult anv_AllocateCommandBuffers(
279     VkDevice                                    _device,
280     const VkCommandBufferAllocateInfo*          pAllocateInfo,
281     VkCommandBuffer*                            pCommandBuffers)
282 {
283    ANV_FROM_HANDLE(anv_device, device, _device);
284    ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool);
285
286    VkResult result = VK_SUCCESS;
287    uint32_t i;
288
289    for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
290       result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level,
291                                      &pCommandBuffers[i]);
292       if (result != VK_SUCCESS)
293          break;
294    }
295
296    if (result != VK_SUCCESS)
297       anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
298                              i, pCommandBuffers);
299
300    return result;
301 }
302
303 static void
304 anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer)
305 {
306    list_del(&cmd_buffer->pool_link);
307
308    anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
309
310    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
311    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
312
313    anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
314    anv_free(&cmd_buffer->pool->alloc, cmd_buffer);
315 }
316
317 void anv_FreeCommandBuffers(
318     VkDevice                                    device,
319     VkCommandPool                               commandPool,
320     uint32_t                                    commandBufferCount,
321     const VkCommandBuffer*                      pCommandBuffers)
322 {
323    for (uint32_t i = 0; i < commandBufferCount; i++) {
324       ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
325
326       anv_cmd_buffer_destroy(cmd_buffer);
327    }
328 }
329
330 static VkResult
331 anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer)
332 {
333    cmd_buffer->usage_flags = 0;
334    cmd_buffer->state.current_pipeline = UINT32_MAX;
335    anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
336    anv_cmd_state_reset(cmd_buffer);
337
338    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
339    anv_state_stream_init(&cmd_buffer->surface_state_stream,
340                          &cmd_buffer->device->surface_state_block_pool);
341
342    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
343    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
344                          &cmd_buffer->device->dynamic_state_block_pool);
345    return VK_SUCCESS;
346 }
347
348 VkResult anv_ResetCommandBuffer(
349     VkCommandBuffer                             commandBuffer,
350     VkCommandBufferResetFlags                   flags)
351 {
352    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
353    return anv_cmd_buffer_reset(cmd_buffer);
354 }
355
356 void
357 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
358 {
359    switch (cmd_buffer->device->info.gen) {
360    case 7:
361       if (cmd_buffer->device->info.is_haswell)
362          return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
363       else
364          return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
365    case 8:
366       return gen8_cmd_buffer_emit_state_base_address(cmd_buffer);
367    case 9:
368       return gen9_cmd_buffer_emit_state_base_address(cmd_buffer);
369    default:
370       unreachable("unsupported gen\n");
371    }
372 }
373
374 VkResult anv_BeginCommandBuffer(
375     VkCommandBuffer                             commandBuffer,
376     const VkCommandBufferBeginInfo*             pBeginInfo)
377 {
378    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
379
380    /* If this is the first vkBeginCommandBuffer, we must *initialize* the
381     * command buffer's state. Otherwise, we must *reset* its state. In both
382     * cases we reset it.
383     *
384     * From the Vulkan 1.0 spec:
385     *
386     *    If a command buffer is in the executable state and the command buffer
387     *    was allocated from a command pool with the
388     *    VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then
389     *    vkBeginCommandBuffer implicitly resets the command buffer, behaving
390     *    as if vkResetCommandBuffer had been called with
391     *    VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts
392     *    the command buffer in the recording state.
393     */
394    anv_cmd_buffer_reset(cmd_buffer);
395
396    cmd_buffer->usage_flags = pBeginInfo->flags;
397
398    assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ||
399           !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT));
400
401    anv_cmd_buffer_emit_state_base_address(cmd_buffer);
402
403    if (cmd_buffer->usage_flags &
404        VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
405       cmd_buffer->state.framebuffer =
406          anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
407       cmd_buffer->state.pass =
408          anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
409
410       struct anv_subpass *subpass =
411          &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
412
413       anv_cmd_buffer_set_subpass(cmd_buffer, subpass);
414    }
415
416    return VK_SUCCESS;
417 }
418
419 VkResult anv_EndCommandBuffer(
420     VkCommandBuffer                             commandBuffer)
421 {
422    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
423    struct anv_device *device = cmd_buffer->device;
424
425    anv_cmd_buffer_end_batch_buffer(cmd_buffer);
426
427    if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
428       /* The algorithm used to compute the validate list is not threadsafe as
429        * it uses the bo->index field.  We have to lock the device around it.
430        * Fortunately, the chances for contention here are probably very low.
431        */
432       pthread_mutex_lock(&device->mutex);
433       anv_cmd_buffer_prepare_execbuf(cmd_buffer);
434       pthread_mutex_unlock(&device->mutex);
435    }
436
437    return VK_SUCCESS;
438 }
439
440 void anv_CmdBindPipeline(
441     VkCommandBuffer                             commandBuffer,
442     VkPipelineBindPoint                         pipelineBindPoint,
443     VkPipeline                                  _pipeline)
444 {
445    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
446    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
447
448    switch (pipelineBindPoint) {
449    case VK_PIPELINE_BIND_POINT_COMPUTE:
450       cmd_buffer->state.compute_pipeline = pipeline;
451       cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE;
452       cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
453       cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
454       break;
455
456    case VK_PIPELINE_BIND_POINT_GRAPHICS:
457       cmd_buffer->state.pipeline = pipeline;
458       cmd_buffer->state.vb_dirty |= pipeline->vb_used;
459       cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
460       cmd_buffer->state.push_constants_dirty |= pipeline->active_stages;
461       cmd_buffer->state.descriptors_dirty |= pipeline->active_stages;
462
463       /* Apply the dynamic state from the pipeline */
464       cmd_buffer->state.dirty |= pipeline->dynamic_state_mask;
465       anv_dynamic_state_copy(&cmd_buffer->state.dynamic,
466                              &pipeline->dynamic_state,
467                              pipeline->dynamic_state_mask);
468       break;
469
470    default:
471       assert(!"invalid bind point");
472       break;
473    }
474 }
475
476 void anv_CmdSetViewport(
477     VkCommandBuffer                             commandBuffer,
478     uint32_t                                    firstViewport,
479     uint32_t                                    viewportCount,
480     const VkViewport*                           pViewports)
481 {
482    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
483
484    const uint32_t total_count = firstViewport + viewportCount;
485    if (cmd_buffer->state.dynamic.viewport.count < total_count)
486       cmd_buffer->state.dynamic.viewport.count = total_count;
487
488    memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport,
489           pViewports, viewportCount * sizeof(*pViewports));
490
491    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
492 }
493
494 void anv_CmdSetScissor(
495     VkCommandBuffer                             commandBuffer,
496     uint32_t                                    firstScissor,
497     uint32_t                                    scissorCount,
498     const VkRect2D*                             pScissors)
499 {
500    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
501
502    const uint32_t total_count = firstScissor + scissorCount;
503    if (cmd_buffer->state.dynamic.scissor.count < total_count)
504       cmd_buffer->state.dynamic.scissor.count = total_count;
505
506    memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor,
507           pScissors, scissorCount * sizeof(*pScissors));
508
509    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
510 }
511
512 void anv_CmdSetLineWidth(
513     VkCommandBuffer                             commandBuffer,
514     float                                       lineWidth)
515 {
516    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
517
518    cmd_buffer->state.dynamic.line_width = lineWidth;
519    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
520 }
521
522 void anv_CmdSetDepthBias(
523     VkCommandBuffer                             commandBuffer,
524     float                                       depthBiasConstantFactor,
525     float                                       depthBiasClamp,
526     float                                       depthBiasSlopeFactor)
527 {
528    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
529
530    cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor;
531    cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp;
532    cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor;
533
534    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
535 }
536
537 void anv_CmdSetBlendConstants(
538     VkCommandBuffer                             commandBuffer,
539     const float                                 blendConstants[4])
540 {
541    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
542
543    memcpy(cmd_buffer->state.dynamic.blend_constants,
544           blendConstants, sizeof(float) * 4);
545
546    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
547 }
548
549 void anv_CmdSetDepthBounds(
550     VkCommandBuffer                             commandBuffer,
551     float                                       minDepthBounds,
552     float                                       maxDepthBounds)
553 {
554    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
555
556    cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
557    cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
558
559    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
560 }
561
562 void anv_CmdSetStencilCompareMask(
563     VkCommandBuffer                             commandBuffer,
564     VkStencilFaceFlags                          faceMask,
565     uint32_t                                    compareMask)
566 {
567    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
568
569    if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
570       cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask;
571    if (faceMask & VK_STENCIL_FACE_BACK_BIT)
572       cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask;
573
574    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
575 }
576
577 void anv_CmdSetStencilWriteMask(
578     VkCommandBuffer                             commandBuffer,
579     VkStencilFaceFlags                          faceMask,
580     uint32_t                                    writeMask)
581 {
582    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
583
584    if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
585       cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask;
586    if (faceMask & VK_STENCIL_FACE_BACK_BIT)
587       cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask;
588
589    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
590 }
591
592 void anv_CmdSetStencilReference(
593     VkCommandBuffer                             commandBuffer,
594     VkStencilFaceFlags                          faceMask,
595     uint32_t                                    reference)
596 {
597    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
598
599    if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
600       cmd_buffer->state.dynamic.stencil_reference.front = reference;
601    if (faceMask & VK_STENCIL_FACE_BACK_BIT)
602       cmd_buffer->state.dynamic.stencil_reference.back = reference;
603
604    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
605 }
606
607 void anv_CmdBindDescriptorSets(
608     VkCommandBuffer                             commandBuffer,
609     VkPipelineBindPoint                         pipelineBindPoint,
610     VkPipelineLayout                            _layout,
611     uint32_t                                    firstSet,
612     uint32_t                                    descriptorSetCount,
613     const VkDescriptorSet*                      pDescriptorSets,
614     uint32_t                                    dynamicOffsetCount,
615     const uint32_t*                             pDynamicOffsets)
616 {
617    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
618    ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
619    struct anv_descriptor_set_layout *set_layout;
620
621    assert(firstSet + descriptorSetCount < MAX_SETS);
622
623    uint32_t dynamic_slot = 0;
624    for (uint32_t i = 0; i < descriptorSetCount; i++) {
625       ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
626       set_layout = layout->set[firstSet + i].layout;
627
628       if (cmd_buffer->state.descriptors[firstSet + i] != set) {
629          cmd_buffer->state.descriptors[firstSet + i] = set;
630          cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
631       }
632
633       if (set_layout->dynamic_offset_count > 0) {
634          anv_foreach_stage(s, set_layout->shader_stages) {
635             anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic);
636
637             struct anv_push_constants *push =
638                cmd_buffer->state.push_constants[s];
639
640             unsigned d = layout->set[firstSet + i].dynamic_offset_start;
641             const uint32_t *offsets = pDynamicOffsets + dynamic_slot;
642             struct anv_descriptor *desc = set->descriptors;
643
644             for (unsigned b = 0; b < set_layout->binding_count; b++) {
645                if (set_layout->binding[b].dynamic_offset_index < 0)
646                   continue;
647
648                unsigned array_size = set_layout->binding[b].array_size;
649                for (unsigned j = 0; j < array_size; j++) {
650                   uint32_t range = 0;
651                   if (desc->buffer_view)
652                      range = desc->buffer_view->range;
653                   push->dynamic[d].offset = *(offsets++);
654                   push->dynamic[d].range = range;
655                   desc++;
656                   d++;
657                }
658             }
659          }
660          cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages;
661       }
662    }
663 }
664
665 void anv_CmdBindVertexBuffers(
666     VkCommandBuffer                             commandBuffer,
667     uint32_t                                    firstBinding,
668     uint32_t                                    bindingCount,
669     const VkBuffer*                             pBuffers,
670     const VkDeviceSize*                         pOffsets)
671 {
672    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
673    struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
674
675    /* We have to defer setting up vertex buffer since we need the buffer
676     * stride from the pipeline. */
677
678    assert(firstBinding + bindingCount < MAX_VBS);
679    for (uint32_t i = 0; i < bindingCount; i++) {
680       vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
681       vb[firstBinding + i].offset = pOffsets[i];
682       cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i);
683    }
684 }
685
686 static void
687 add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
688                         struct anv_state state, struct anv_bo *bo, uint32_t offset)
689 {
690    /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and
691     * 9 for gen8+.  We only write the first dword for gen8+ here and rely on
692     * the initial state to set the high bits to 0. */
693
694    const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8;
695
696    anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
697                       state.offset + dword * 4, bo, offset);
698 }
699
700 enum isl_format
701 anv_isl_format_for_descriptor_type(VkDescriptorType type)
702 {
703    switch (type) {
704    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
705    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
706       return ISL_FORMAT_R32G32B32A32_FLOAT;
707
708    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
709    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
710       return ISL_FORMAT_RAW;
711
712    default:
713       unreachable("Invalid descriptor type");
714    }
715 }
716
717 static struct anv_state
718 anv_cmd_buffer_alloc_null_surface_state(struct anv_cmd_buffer *cmd_buffer,
719                                         struct anv_framebuffer *fb)
720 {
721    switch (cmd_buffer->device->info.gen) {
722    case 7:
723       if (cmd_buffer->device->info.is_haswell) {
724          return gen75_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb);
725       } else {
726          return gen7_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb);
727       }
728    case 8:
729       return gen8_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb);
730    case 9:
731       return gen9_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb);
732    default:
733       unreachable("Invalid hardware generation");
734    }
735 }
736
737 VkResult
738 anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
739                                   gl_shader_stage stage,
740                                   struct anv_state *bt_state)
741 {
742    struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
743    struct anv_subpass *subpass = cmd_buffer->state.subpass;
744    struct anv_pipeline_bind_map *map;
745    uint32_t bias, state_offset;
746
747    switch (stage) {
748    case  MESA_SHADER_COMPUTE:
749       map = &cmd_buffer->state.compute_pipeline->bindings[stage];
750       bias = 1;
751       break;
752    default:
753       map = &cmd_buffer->state.pipeline->bindings[stage];
754       bias = 0;
755       break;
756    }
757
758    if (bias + map->surface_count == 0) {
759       *bt_state = (struct anv_state) { 0, };
760       return VK_SUCCESS;
761    }
762
763    *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer,
764                                                   bias + map->surface_count,
765                                                   &state_offset);
766    uint32_t *bt_map = bt_state->map;
767
768    if (bt_state->map == NULL)
769       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
770
771    if (stage == MESA_SHADER_COMPUTE &&
772        get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) {
773       struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo;
774       uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset;
775
776       struct anv_state surface_state;
777       surface_state =
778          anv_cmd_buffer_alloc_surface_state(cmd_buffer);
779
780       const enum isl_format format =
781          anv_isl_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
782       anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
783                                     format, bo_offset, 12, 1);
784
785       bt_map[0] = surface_state.offset + state_offset;
786       add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
787    }
788
789    if (map->surface_count == 0)
790       goto out;
791
792    if (map->image_count > 0) {
793       VkResult result =
794          anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
795       if (result != VK_SUCCESS)
796          return result;
797
798       cmd_buffer->state.push_constants_dirty |= 1 << stage;
799    }
800
801    uint32_t image = 0;
802    for (uint32_t s = 0; s < map->surface_count; s++) {
803       struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s];
804
805       struct anv_state surface_state;
806       struct anv_bo *bo;
807       uint32_t bo_offset;
808
809       if (binding->set == ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) {
810          /* Color attachment binding */
811          assert(stage == MESA_SHADER_FRAGMENT);
812          assert(binding->binding == 0);
813          if (binding->index < subpass->color_count) {
814             const struct anv_image_view *iview =
815                fb->attachments[subpass->color_attachments[binding->index]];
816
817             assert(iview->color_rt_surface_state.alloc_size);
818             surface_state = iview->color_rt_surface_state;
819             add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state,
820                                     iview->bo, iview->offset);
821          } else {
822             /* Null render target */
823             struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
824             surface_state =
825                anv_cmd_buffer_alloc_null_surface_state(cmd_buffer, fb);
826          }
827
828          bt_map[bias + s] = surface_state.offset + state_offset;
829          continue;
830       }
831
832       struct anv_descriptor_set *set =
833          cmd_buffer->state.descriptors[binding->set];
834       uint32_t offset = set->layout->binding[binding->binding].descriptor_index;
835       struct anv_descriptor *desc = &set->descriptors[offset + binding->index];
836
837       switch (desc->type) {
838       case VK_DESCRIPTOR_TYPE_SAMPLER:
839          /* Nothing for us to do here */
840          continue;
841
842       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
843       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
844       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
845          surface_state = desc->image_view->sampler_surface_state;
846          assert(surface_state.alloc_size);
847          bo = desc->image_view->bo;
848          bo_offset = desc->image_view->offset;
849          break;
850
851       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
852          surface_state = desc->image_view->storage_surface_state;
853          assert(surface_state.alloc_size);
854          bo = desc->image_view->bo;
855          bo_offset = desc->image_view->offset;
856
857          struct brw_image_param *image_param =
858             &cmd_buffer->state.push_constants[stage]->images[image++];
859
860          *image_param = desc->image_view->storage_image_param;
861          image_param->surface_idx = bias + s;
862          break;
863       }
864
865       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
866       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
867       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
868       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
869       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
870          surface_state = desc->buffer_view->surface_state;
871          assert(surface_state.alloc_size);
872          bo = desc->buffer_view->bo;
873          bo_offset = desc->buffer_view->offset;
874          break;
875
876       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
877          surface_state = desc->buffer_view->storage_surface_state;
878          assert(surface_state.alloc_size);
879          bo = desc->buffer_view->bo;
880          bo_offset = desc->buffer_view->offset;
881
882          struct brw_image_param *image_param =
883             &cmd_buffer->state.push_constants[stage]->images[image++];
884
885          *image_param = desc->buffer_view->storage_image_param;
886          image_param->surface_idx = bias + s;
887          break;
888
889       default:
890          assert(!"Invalid descriptor type");
891          continue;
892       }
893
894       bt_map[bias + s] = surface_state.offset + state_offset;
895       add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
896    }
897    assert(image == map->image_count);
898
899  out:
900    if (!cmd_buffer->device->info.has_llc)
901       anv_state_clflush(*bt_state);
902
903    return VK_SUCCESS;
904 }
905
906 VkResult
907 anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
908                              gl_shader_stage stage, struct anv_state *state)
909 {
910    struct anv_pipeline_bind_map *map;
911
912    if (stage == MESA_SHADER_COMPUTE)
913       map = &cmd_buffer->state.compute_pipeline->bindings[stage];
914    else
915       map = &cmd_buffer->state.pipeline->bindings[stage];
916
917    if (map->sampler_count == 0) {
918       *state = (struct anv_state) { 0, };
919       return VK_SUCCESS;
920    }
921
922    uint32_t size = map->sampler_count * 16;
923    *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32);
924
925    if (state->map == NULL)
926       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
927
928    for (uint32_t s = 0; s < map->sampler_count; s++) {
929       struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s];
930       struct anv_descriptor_set *set =
931          cmd_buffer->state.descriptors[binding->set];
932       uint32_t offset = set->layout->binding[binding->binding].descriptor_index;
933       struct anv_descriptor *desc = &set->descriptors[offset + binding->index];
934
935       if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER &&
936           desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
937          continue;
938
939       struct anv_sampler *sampler = desc->sampler;
940
941       /* This can happen if we have an unfilled slot since TYPE_SAMPLER
942        * happens to be zero.
943        */
944       if (sampler == NULL)
945          continue;
946
947       memcpy(state->map + (s * 16),
948              sampler->state, sizeof(sampler->state));
949    }
950
951    if (!cmd_buffer->device->info.has_llc)
952       anv_state_clflush(*state);
953
954    return VK_SUCCESS;
955 }
956
957 uint32_t
958 anv_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
959 {
960    VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
961                               cmd_buffer->state.pipeline->active_stages;
962
963    VkResult result = VK_SUCCESS;
964    anv_foreach_stage(s, dirty) {
965       result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
966                                             &cmd_buffer->state.samplers[s]);
967       if (result != VK_SUCCESS)
968          break;
969       result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
970                                                  &cmd_buffer->state.binding_tables[s]);
971       if (result != VK_SUCCESS)
972          break;
973    }
974
975    if (result != VK_SUCCESS) {
976       assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
977
978       result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
979       assert(result == VK_SUCCESS);
980
981       /* Re-emit state base addresses so we get the new surface state base
982        * address before we start emitting binding tables etc.
983        */
984       anv_cmd_buffer_emit_state_base_address(cmd_buffer);
985
986       /* Re-emit all active binding tables */
987       dirty |= cmd_buffer->state.pipeline->active_stages;
988       anv_foreach_stage(s, dirty) {
989          result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
990                                                &cmd_buffer->state.samplers[s]);
991          if (result != VK_SUCCESS)
992             return result;
993          result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
994                                                     &cmd_buffer->state.binding_tables[s]);
995          if (result != VK_SUCCESS)
996             return result;
997       }
998    }
999
1000    cmd_buffer->state.descriptors_dirty &= ~dirty;
1001
1002    return dirty;
1003 }
1004
1005 struct anv_state
1006 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
1007                             const void *data, uint32_t size, uint32_t alignment)
1008 {
1009    struct anv_state state;
1010
1011    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
1012    memcpy(state.map, data, size);
1013
1014    if (!cmd_buffer->device->info.has_llc)
1015       anv_state_clflush(state);
1016
1017    VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
1018
1019    return state;
1020 }
1021
1022 struct anv_state
1023 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
1024                              uint32_t *a, uint32_t *b,
1025                              uint32_t dwords, uint32_t alignment)
1026 {
1027    struct anv_state state;
1028    uint32_t *p;
1029
1030    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1031                                               dwords * 4, alignment);
1032    p = state.map;
1033    for (uint32_t i = 0; i < dwords; i++)
1034       p[i] = a[i] | b[i];
1035
1036    if (!cmd_buffer->device->info.has_llc)
1037       anv_state_clflush(state);
1038
1039    VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
1040
1041    return state;
1042 }
1043
1044 /**
1045  * @brief Setup the command buffer for recording commands inside the given
1046  * subpass.
1047  *
1048  * This does not record all commands needed for starting the subpass.
1049  * Starting the subpass may require additional commands.
1050  *
1051  * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer
1052  * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the
1053  * command buffer for recording commands for some subpass.  But only the first
1054  * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass.
1055  */
1056 void
1057 anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer,
1058                            struct anv_subpass *subpass)
1059 {
1060    switch (cmd_buffer->device->info.gen) {
1061    case 7:
1062       if (cmd_buffer->device->info.is_haswell) {
1063          gen75_cmd_buffer_set_subpass(cmd_buffer, subpass);
1064       } else {
1065          gen7_cmd_buffer_set_subpass(cmd_buffer, subpass);
1066       }
1067       break;
1068    case 8:
1069       gen8_cmd_buffer_set_subpass(cmd_buffer, subpass);
1070       break;
1071    case 9:
1072       gen9_cmd_buffer_set_subpass(cmd_buffer, subpass);
1073       break;
1074    default:
1075       unreachable("unsupported gen\n");
1076    }
1077 }
1078
1079 struct anv_state
1080 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
1081                               gl_shader_stage stage)
1082 {
1083    struct anv_push_constants *data =
1084       cmd_buffer->state.push_constants[stage];
1085    const struct brw_stage_prog_data *prog_data =
1086       cmd_buffer->state.pipeline->prog_data[stage];
1087
1088    /* If we don't actually have any push constants, bail. */
1089    if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
1090       return (struct anv_state) { .offset = 0 };
1091
1092    struct anv_state state =
1093       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1094                                          prog_data->nr_params * sizeof(float),
1095                                          32 /* bottom 5 bits MBZ */);
1096
1097    /* Walk through the param array and fill the buffer with data */
1098    uint32_t *u32_map = state.map;
1099    for (unsigned i = 0; i < prog_data->nr_params; i++) {
1100       uint32_t offset = (uintptr_t)prog_data->param[i];
1101       u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
1102    }
1103
1104    if (!cmd_buffer->device->info.has_llc)
1105       anv_state_clflush(state);
1106
1107    return state;
1108 }
1109
1110 struct anv_state
1111 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
1112 {
1113    struct anv_push_constants *data =
1114       cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
1115    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
1116    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1117    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
1118
1119    /* If we don't actually have any push constants, bail. */
1120    if (cs_prog_data->push.total.size == 0)
1121       return (struct anv_state) { .offset = 0 };
1122
1123    const unsigned push_constant_alignment =
1124       cmd_buffer->device->info.gen < 8 ? 32 : 64;
1125    const unsigned aligned_total_push_constants_size =
1126       ALIGN(cs_prog_data->push.total.size, push_constant_alignment);
1127    struct anv_state state =
1128       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1129                                          aligned_total_push_constants_size,
1130                                          push_constant_alignment);
1131
1132    /* Walk through the param array and fill the buffer with data */
1133    uint32_t *u32_map = state.map;
1134
1135    if (cs_prog_data->push.cross_thread.size > 0) {
1136       assert(cs_prog_data->thread_local_id_index < 0 ||
1137              cs_prog_data->thread_local_id_index >=
1138                 cs_prog_data->push.cross_thread.dwords);
1139       for (unsigned i = 0;
1140            i < cs_prog_data->push.cross_thread.dwords;
1141            i++) {
1142          uint32_t offset = (uintptr_t)prog_data->param[i];
1143          u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
1144       }
1145    }
1146
1147    if (cs_prog_data->push.per_thread.size > 0) {
1148       for (unsigned t = 0; t < cs_prog_data->threads; t++) {
1149          unsigned dst =
1150             8 * (cs_prog_data->push.per_thread.regs * t +
1151                  cs_prog_data->push.cross_thread.regs);
1152          unsigned src = cs_prog_data->push.cross_thread.dwords;
1153          for ( ; src < prog_data->nr_params; src++, dst++) {
1154             if (src != cs_prog_data->thread_local_id_index) {
1155                uint32_t offset = (uintptr_t)prog_data->param[src];
1156                u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset);
1157             } else {
1158                u32_map[dst] = t * cs_prog_data->simd_size;
1159             }
1160          }
1161       }
1162    }
1163
1164    if (!cmd_buffer->device->info.has_llc)
1165       anv_state_clflush(state);
1166
1167    return state;
1168 }
1169
1170 void anv_CmdPushConstants(
1171     VkCommandBuffer                             commandBuffer,
1172     VkPipelineLayout                            layout,
1173     VkShaderStageFlags                          stageFlags,
1174     uint32_t                                    offset,
1175     uint32_t                                    size,
1176     const void*                                 pValues)
1177 {
1178    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1179
1180    anv_foreach_stage(stage, stageFlags) {
1181       anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data);
1182
1183       memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset,
1184              pValues, size);
1185    }
1186
1187    cmd_buffer->state.push_constants_dirty |= stageFlags;
1188 }
1189
1190 void anv_CmdExecuteCommands(
1191     VkCommandBuffer                             commandBuffer,
1192     uint32_t                                    commandBufferCount,
1193     const VkCommandBuffer*                      pCmdBuffers)
1194 {
1195    ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer);
1196
1197    assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1198
1199    for (uint32_t i = 0; i < commandBufferCount; i++) {
1200       ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
1201
1202       assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1203
1204       anv_cmd_buffer_add_secondary(primary, secondary);
1205    }
1206 }
1207
1208 VkResult anv_CreateCommandPool(
1209     VkDevice                                    _device,
1210     const VkCommandPoolCreateInfo*              pCreateInfo,
1211     const VkAllocationCallbacks*                pAllocator,
1212     VkCommandPool*                              pCmdPool)
1213 {
1214    ANV_FROM_HANDLE(anv_device, device, _device);
1215    struct anv_cmd_pool *pool;
1216
1217    pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
1218                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1219    if (pool == NULL)
1220       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1221
1222    if (pAllocator)
1223       pool->alloc = *pAllocator;
1224    else
1225       pool->alloc = device->alloc;
1226
1227    list_inithead(&pool->cmd_buffers);
1228
1229    *pCmdPool = anv_cmd_pool_to_handle(pool);
1230
1231    return VK_SUCCESS;
1232 }
1233
1234 void anv_DestroyCommandPool(
1235     VkDevice                                    _device,
1236     VkCommandPool                               commandPool,
1237     const VkAllocationCallbacks*                pAllocator)
1238 {
1239    ANV_FROM_HANDLE(anv_device, device, _device);
1240    ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
1241
1242    list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer,
1243                             &pool->cmd_buffers, pool_link) {
1244       anv_cmd_buffer_destroy(cmd_buffer);
1245    }
1246
1247    anv_free2(&device->alloc, pAllocator, pool);
1248 }
1249
1250 VkResult anv_ResetCommandPool(
1251     VkDevice                                    device,
1252     VkCommandPool                               commandPool,
1253     VkCommandPoolResetFlags                     flags)
1254 {
1255    ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
1256
1257    list_for_each_entry(struct anv_cmd_buffer, cmd_buffer,
1258                        &pool->cmd_buffers, pool_link) {
1259       anv_cmd_buffer_reset(cmd_buffer);
1260    }
1261
1262    return VK_SUCCESS;
1263 }
1264
1265 /**
1266  * Return NULL if the current subpass has no depthstencil attachment.
1267  */
1268 const struct anv_image_view *
1269 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer)
1270 {
1271    const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1272    const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1273
1274    if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED)
1275       return NULL;
1276
1277    const struct anv_image_view *iview =
1278       fb->attachments[subpass->depth_stencil_attachment];
1279
1280    assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1281                                 VK_IMAGE_ASPECT_STENCIL_BIT));
1282
1283    return iview;
1284 }