OSDN Git Service

anv/pipeline: Add support for caching the push constant map
[android-x86/external-mesa.git] / src / intel / vulkan / gen8_pipeline.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 #include "genX_pipeline_util.h"
36
37 static void
38 emit_ia_state(struct anv_pipeline *pipeline,
39               const VkPipelineInputAssemblyStateCreateInfo *info,
40               const struct anv_graphics_pipeline_create_info *extra)
41 {
42    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
43       vft.PrimitiveTopologyType = pipeline->topology;
44    }
45 }
46
47 static void
48 emit_rs_state(struct anv_pipeline *pipeline,
49               const VkPipelineRasterizationStateCreateInfo *info,
50               const VkPipelineMultisampleStateCreateInfo *ms_info,
51               const struct anv_graphics_pipeline_create_info *extra)
52 {
53    uint32_t samples = 1;
54
55    if (ms_info)
56       samples = ms_info->rasterizationSamples;
57
58    struct GENX(3DSTATE_SF) sf = {
59       GENX(3DSTATE_SF_header),
60       .ViewportTransformEnable = !(extra && extra->use_rectlist),
61       .TriangleStripListProvokingVertexSelect = 0,
62       .LineStripListProvokingVertexSelect = 0,
63       .TriangleFanProvokingVertexSelect = 1,
64       .PointWidthSource = Vertex,
65       .PointWidth = 1.0,
66    };
67
68    /* FINISHME: VkBool32 rasterizerDiscardEnable; */
69
70    GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
71
72    struct GENX(3DSTATE_RASTER) raster = {
73       GENX(3DSTATE_RASTER_header),
74
75       /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
76        * "Multisample Modes State".
77        */
78       .DXMultisampleRasterizationEnable = samples > 1,
79       .ForcedSampleCount = FSC_NUMRASTSAMPLES_0,
80       .ForceMultisampling = false,
81
82       .FrontWinding = vk_to_gen_front_face[info->frontFace],
83       .CullMode = vk_to_gen_cullmode[info->cullMode],
84       .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
85       .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
86       .ScissorRectangleEnable = !(extra && extra->use_rectlist),
87 #if GEN_GEN == 8
88       .ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
89 #else
90       /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
91       .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable,
92       .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable,
93 #endif
94       .GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
95       .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
96       .GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
97    };
98
99    GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
100 }
101
102 static void
103 emit_ms_state(struct anv_pipeline *pipeline,
104               const VkPipelineMultisampleStateCreateInfo *info)
105 {
106    uint32_t samples = 1;
107    uint32_t log2_samples = 0;
108
109    /* From the Vulkan 1.0 spec:
110     *    If pSampleMask is NULL, it is treated as if the mask has all bits
111     *    enabled, i.e. no coverage is removed from fragments.
112     *
113     * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
114     */
115    uint32_t sample_mask = 0xffff;
116
117    if (info) {
118       samples = info->rasterizationSamples;
119       log2_samples = __builtin_ffs(samples) - 1;
120    }
121
122    if (info && info->pSampleMask)
123       sample_mask &= info->pSampleMask[0];
124
125    if (info && info->sampleShadingEnable)
126       anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
127
128    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
129       /* The PRM says that this bit is valid only for DX9:
130        *
131        *    SW can choose to set this bit only for DX9 API. DX10/OGL API's
132        *    should not have any effect by setting or not setting this bit.
133        */
134       ms.PixelPositionOffsetEnable = false;
135
136       ms.PixelLocation = CENTER;
137       ms.NumberofMultisamples = log2_samples;
138    }
139
140    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
141       sm.SampleMask = sample_mask;
142    }
143 }
144
145 VkResult
146 genX(graphics_pipeline_create)(
147     VkDevice                                    _device,
148     struct anv_pipeline_cache *                 cache,
149     const VkGraphicsPipelineCreateInfo*         pCreateInfo,
150     const struct anv_graphics_pipeline_create_info *extra,
151     const VkAllocationCallbacks*                pAllocator,
152     VkPipeline*                                 pPipeline)
153 {
154    ANV_FROM_HANDLE(anv_device, device, _device);
155    ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
156    struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
157    struct anv_pipeline *pipeline;
158    VkResult result;
159    uint32_t offset, length;
160
161    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
162
163    pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
164                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
165    if (pipeline == NULL)
166       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
167
168    result = anv_pipeline_init(pipeline, device, cache,
169                               pCreateInfo, extra, pAllocator);
170    if (result != VK_SUCCESS) {
171       anv_free2(&device->alloc, pAllocator, pipeline);
172       return result;
173    }
174
175    assert(pCreateInfo->pVertexInputState);
176    emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
177    assert(pCreateInfo->pInputAssemblyState);
178    emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
179    assert(pCreateInfo->pRasterizationState);
180    emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
181                  pCreateInfo->pMultisampleState, extra);
182    emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
183    emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
184    emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
185                            pCreateInfo->pMultisampleState);
186
187    emit_urb_setup(pipeline);
188
189    emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
190                      pCreateInfo->pRasterizationState, extra);
191
192    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
193    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
194       wm.StatisticsEnable                    = true;
195       wm.LineEndCapAntialiasingRegionWidth   = _05pixels;
196       wm.LineAntialiasingRegionWidth         = _10pixels;
197       wm.ForceThreadDispatchEnable           = NORMAL;
198       wm.PointRasterizationRule              = RASTRULE_UPPER_RIGHT;
199
200       if (wm_prog_data && wm_prog_data->early_fragment_tests) {
201          wm.EarlyDepthStencilControl         = PREPS;
202       } else if (wm_prog_data && wm_prog_data->has_side_effects) {
203          wm.EarlyDepthStencilControl         = PSEXEC;
204       } else {
205          wm.EarlyDepthStencilControl         = NORMAL;
206       }
207
208       wm.BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ?
209          0 : wm_prog_data->barycentric_interp_modes;
210    }
211
212    if (pipeline->gs_kernel == NO_KERNEL) {
213       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
214    } else {
215       const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
216       offset = 1;
217       length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
218
219       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
220          gs.SingleProgramFlow       = false;
221          gs.KernelStartPointer      = pipeline->gs_kernel;
222          gs.VectorMaskEnable        = false;
223          gs.SamplerCount            = 0;
224          gs.BindingTableEntryCount  = 0;
225          gs.ExpectedVertexCount     = gs_prog_data->vertices_in;
226
227          gs.ScratchSpaceBasePointer = (struct anv_address) {
228             .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
229                                          MESA_SHADER_GEOMETRY,
230                                          gs_prog_data->base.base.total_scratch),
231             .offset = 0,
232          };
233          gs.PerThreadScratchSpace   = scratch_space(&gs_prog_data->base.base);
234          gs.OutputVertexSize        = gs_prog_data->output_vertex_size_hwords * 2 - 1;
235          gs.OutputTopology          = gs_prog_data->output_topology;
236          gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
237          gs.IncludeVertexHandles    = gs_prog_data->base.include_vue_handles;
238
239          gs.DispatchGRFStartRegisterForURBData =
240             gs_prog_data->base.base.dispatch_grf_start_reg;
241
242          gs.MaximumNumberofThreads  = device->info.max_gs_threads / 2 - 1;
243          gs.ControlDataHeaderSize   = gs_prog_data->control_data_header_size_hwords;
244          gs.DispatchMode            = gs_prog_data->base.dispatch_mode;
245          gs.StatisticsEnable        = true;
246          gs.IncludePrimitiveID      = gs_prog_data->include_primitive_id;
247          gs.ReorderMode             = TRAILING;
248          gs.Enable                  = true;
249
250          gs.ControlDataFormat       = gs_prog_data->control_data_format;
251
252          gs.StaticOutput            = gs_prog_data->static_vertex_count >= 0;
253          gs.StaticOutputVertexCount =
254             gs_prog_data->static_vertex_count >= 0 ?
255             gs_prog_data->static_vertex_count : 0;
256
257          /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
258           * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
259           * UserClipDistanceCullTestEnableBitmask(v)
260           */
261
262          gs.VertexURBEntryOutputReadOffset = offset;
263          gs.VertexURBEntryOutputLength = length;
264       }
265    }
266
267    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
268    /* Skip the VUE header and position slots */
269    offset = 1;
270    length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
271
272    uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
273                                                          pipeline->vs_vec4;
274
275    if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) {
276       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
277          vs.FunctionEnable = false;
278          /* Even if VS is disabled, SBE still gets the amount of
279           * vertex data to read from this field. */
280          vs.VertexURBEntryOutputReadOffset = offset;
281          vs.VertexURBEntryOutputLength = length;
282       }
283    } else {
284       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
285          vs.KernelStartPointer            = vs_start;
286          vs.SingleVertexDispatch          = false;
287          vs.VectorMaskEnable              = false;
288          vs.SamplerCount                  = 0;
289
290          vs.BindingTableEntryCount =
291             vs_prog_data->base.base.binding_table.size_bytes / 4,
292
293          vs.ThreadDispatchPriority        = false;
294          vs.FloatingPointMode             = IEEE754;
295          vs.IllegalOpcodeExceptionEnable  = false;
296          vs.AccessesUAV                   = false;
297          vs.SoftwareExceptionEnable       = false;
298
299          vs.ScratchSpaceBasePointer = (struct anv_address) {
300             .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
301                                          MESA_SHADER_VERTEX,
302                                          vs_prog_data->base.base.total_scratch),
303             .offset = 0,
304          };
305          vs.PerThreadScratchSpace   = scratch_space(&vs_prog_data->base.base);
306
307          vs.DispatchGRFStartRegisterForURBData =
308             vs_prog_data->base.base.dispatch_grf_start_reg;
309
310          vs.VertexURBEntryReadLength      = vs_prog_data->base.urb_read_length;
311          vs.VertexURBEntryReadOffset      = 0;
312
313          vs.MaximumNumberofThreads        = device->info.max_vs_threads - 1;
314          vs.StatisticsEnable              = false;
315          vs.SIMD8DispatchEnable           = pipeline->vs_simd8 != NO_KERNEL;
316          vs.VertexCacheDisable            = false;
317          vs.FunctionEnable                = true;
318
319          vs.VertexURBEntryOutputReadOffset = offset;
320          vs.VertexURBEntryOutputLength    = length;
321
322          /* TODO */
323          vs.UserClipDistanceClipTestEnableBitmask = 0;
324          vs.UserClipDistanceCullTestEnableBitmask = 0;
325       }
326    }
327
328    const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
329    if (pipeline->ps_ksp0 == NO_KERNEL) {
330       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
331       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
332          extra.PixelShaderValid = false;
333       }
334    } else {
335       emit_3dstate_sbe(pipeline);
336
337       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
338          ps.KernelStartPointer0     = pipeline->ps_ksp0;
339          ps.KernelStartPointer1     = 0;
340          ps.KernelStartPointer2     = pipeline->ps_ksp0 + wm_prog_data->prog_offset_2;
341          ps._8PixelDispatchEnable   = wm_prog_data->dispatch_8;
342          ps._16PixelDispatchEnable  = wm_prog_data->dispatch_16;
343          ps._32PixelDispatchEnable  = false;
344          ps.SingleProgramFlow       = false;
345          ps.VectorMaskEnable        = true;
346          ps.SamplerCount            = 1;
347          ps.PushConstantEnable      = wm_prog_data->base.nr_params > 0;
348          ps.PositionXYOffsetSelect  = wm_prog_data->uses_pos_offset ?
349             POSOFFSET_SAMPLE: POSOFFSET_NONE;
350
351          ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
352
353          ps.ScratchSpaceBasePointer = (struct anv_address) {
354             .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
355                                          MESA_SHADER_FRAGMENT,
356                                          wm_prog_data->base.total_scratch),
357             .offset = 0,
358          };
359          ps.PerThreadScratchSpace   = scratch_space(&wm_prog_data->base);
360
361          ps.DispatchGRFStartRegisterForConstantSetupData0 =
362             wm_prog_data->base.dispatch_grf_start_reg;
363          ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
364          ps.DispatchGRFStartRegisterForConstantSetupData2 =
365             wm_prog_data->dispatch_grf_start_reg_2;
366       }
367
368       bool per_sample_ps = pCreateInfo->pMultisampleState &&
369                            pCreateInfo->pMultisampleState->sampleShadingEnable;
370
371       anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
372          ps.PixelShaderValid              = true;
373          ps.PixelShaderKillsPixel         = wm_prog_data->uses_kill;
374          ps.PixelShaderComputedDepthMode  = wm_prog_data->computed_depth_mode;
375          ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
376          ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
377          ps.PixelShaderIsPerSample        = per_sample_ps;
378          ps.PixelShaderUsesSourceDepth    = wm_prog_data->uses_src_depth;
379          ps.PixelShaderUsesSourceW        = wm_prog_data->uses_src_w;
380 #if GEN_GEN >= 9
381          ps.PixelShaderPullsBary    = wm_prog_data->pulls_bary;
382          ps.InputCoverageMaskState  = wm_prog_data->uses_sample_mask ?
383             ICMS_INNER_CONSERVATIVE : ICMS_NONE;
384 #else
385          ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
386 #endif
387       }
388    }
389
390    *pPipeline = anv_pipeline_to_handle(pipeline);
391
392    return VK_SUCCESS;
393 }