2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
35 #include "genX_pipeline_util.h"
38 emit_ia_state(struct anv_pipeline *pipeline,
39 const VkPipelineInputAssemblyStateCreateInfo *info,
40 const struct anv_graphics_pipeline_create_info *extra)
42 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
43 vft.PrimitiveTopologyType = pipeline->topology;
48 emit_rs_state(struct anv_pipeline *pipeline,
49 const VkPipelineRasterizationStateCreateInfo *info,
50 const VkPipelineMultisampleStateCreateInfo *ms_info,
51 const struct anv_graphics_pipeline_create_info *extra)
56 samples = ms_info->rasterizationSamples;
58 struct GENX(3DSTATE_SF) sf = {
59 GENX(3DSTATE_SF_header),
60 .ViewportTransformEnable = !(extra && extra->use_rectlist),
61 .TriangleStripListProvokingVertexSelect = 0,
62 .LineStripListProvokingVertexSelect = 0,
63 .TriangleFanProvokingVertexSelect = 1,
64 .PointWidthSource = Vertex,
68 /* FINISHME: VkBool32 rasterizerDiscardEnable; */
70 GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
72 struct GENX(3DSTATE_RASTER) raster = {
73 GENX(3DSTATE_RASTER_header),
75 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
76 * "Multisample Modes State".
78 .DXMultisampleRasterizationEnable = samples > 1,
79 .ForcedSampleCount = FSC_NUMRASTSAMPLES_0,
80 .ForceMultisampling = false,
82 .FrontWinding = vk_to_gen_front_face[info->frontFace],
83 .CullMode = vk_to_gen_cullmode[info->cullMode],
84 .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
85 .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
86 .ScissorRectangleEnable = !(extra && extra->use_rectlist),
88 .ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
90 /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
91 .ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable,
92 .ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable,
94 .GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
95 .GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
96 .GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
99 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
103 emit_ms_state(struct anv_pipeline *pipeline,
104 const VkPipelineMultisampleStateCreateInfo *info)
106 uint32_t samples = 1;
107 uint32_t log2_samples = 0;
109 /* From the Vulkan 1.0 spec:
110 * If pSampleMask is NULL, it is treated as if the mask has all bits
111 * enabled, i.e. no coverage is removed from fragments.
113 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
115 uint32_t sample_mask = 0xffff;
118 samples = info->rasterizationSamples;
119 log2_samples = __builtin_ffs(samples) - 1;
122 if (info && info->pSampleMask)
123 sample_mask &= info->pSampleMask[0];
125 if (info && info->sampleShadingEnable)
126 anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
128 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
129 /* The PRM says that this bit is valid only for DX9:
131 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
132 * should not have any effect by setting or not setting this bit.
134 ms.PixelPositionOffsetEnable = false;
136 ms.PixelLocation = CENTER;
137 ms.NumberofMultisamples = log2_samples;
140 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
141 sm.SampleMask = sample_mask;
146 genX(graphics_pipeline_create)(
148 struct anv_pipeline_cache * cache,
149 const VkGraphicsPipelineCreateInfo* pCreateInfo,
150 const struct anv_graphics_pipeline_create_info *extra,
151 const VkAllocationCallbacks* pAllocator,
152 VkPipeline* pPipeline)
154 ANV_FROM_HANDLE(anv_device, device, _device);
155 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
156 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
157 struct anv_pipeline *pipeline;
159 uint32_t offset, length;
161 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
163 pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
164 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
165 if (pipeline == NULL)
166 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
168 result = anv_pipeline_init(pipeline, device, cache,
169 pCreateInfo, extra, pAllocator);
170 if (result != VK_SUCCESS) {
171 anv_free2(&device->alloc, pAllocator, pipeline);
175 assert(pCreateInfo->pVertexInputState);
176 emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
177 assert(pCreateInfo->pInputAssemblyState);
178 emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
179 assert(pCreateInfo->pRasterizationState);
180 emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
181 pCreateInfo->pMultisampleState, extra);
182 emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
183 emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
184 emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
185 pCreateInfo->pMultisampleState);
187 emit_urb_setup(pipeline);
189 emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
190 pCreateInfo->pRasterizationState, extra);
192 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
193 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
194 wm.StatisticsEnable = true;
195 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
196 wm.LineAntialiasingRegionWidth = _10pixels;
197 wm.ForceThreadDispatchEnable = NORMAL;
198 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
200 if (wm_prog_data && wm_prog_data->early_fragment_tests) {
201 wm.EarlyDepthStencilControl = PREPS;
202 } else if (wm_prog_data && wm_prog_data->has_side_effects) {
203 wm.EarlyDepthStencilControl = PSEXEC;
205 wm.EarlyDepthStencilControl = NORMAL;
208 wm.BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ?
209 0 : wm_prog_data->barycentric_interp_modes;
212 if (pipeline->gs_kernel == NO_KERNEL) {
213 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
215 const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
217 length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
219 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
220 gs.SingleProgramFlow = false;
221 gs.KernelStartPointer = pipeline->gs_kernel;
222 gs.VectorMaskEnable = false;
224 gs.BindingTableEntryCount = 0;
225 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
227 gs.ScratchSpaceBasePointer = (struct anv_address) {
228 .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
229 MESA_SHADER_GEOMETRY,
230 gs_prog_data->base.base.total_scratch),
233 gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
234 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
235 gs.OutputTopology = gs_prog_data->output_topology;
236 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
237 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
239 gs.DispatchGRFStartRegisterForURBData =
240 gs_prog_data->base.base.dispatch_grf_start_reg;
242 gs.MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1;
243 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
244 gs.DispatchMode = gs_prog_data->base.dispatch_mode;
245 gs.StatisticsEnable = true;
246 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
247 gs.ReorderMode = TRAILING;
250 gs.ControlDataFormat = gs_prog_data->control_data_format;
252 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
253 gs.StaticOutputVertexCount =
254 gs_prog_data->static_vertex_count >= 0 ?
255 gs_prog_data->static_vertex_count : 0;
257 /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
258 * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
259 * UserClipDistanceCullTestEnableBitmask(v)
262 gs.VertexURBEntryOutputReadOffset = offset;
263 gs.VertexURBEntryOutputLength = length;
267 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
268 /* Skip the VUE header and position slots */
270 length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
272 uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
275 if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) {
276 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
277 vs.FunctionEnable = false;
278 /* Even if VS is disabled, SBE still gets the amount of
279 * vertex data to read from this field. */
280 vs.VertexURBEntryOutputReadOffset = offset;
281 vs.VertexURBEntryOutputLength = length;
284 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
285 vs.KernelStartPointer = vs_start;
286 vs.SingleVertexDispatch = false;
287 vs.VectorMaskEnable = false;
290 vs.BindingTableEntryCount =
291 vs_prog_data->base.base.binding_table.size_bytes / 4,
293 vs.ThreadDispatchPriority = false;
294 vs.FloatingPointMode = IEEE754;
295 vs.IllegalOpcodeExceptionEnable = false;
296 vs.AccessesUAV = false;
297 vs.SoftwareExceptionEnable = false;
299 vs.ScratchSpaceBasePointer = (struct anv_address) {
300 .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
302 vs_prog_data->base.base.total_scratch),
305 vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
307 vs.DispatchGRFStartRegisterForURBData =
308 vs_prog_data->base.base.dispatch_grf_start_reg;
310 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
311 vs.VertexURBEntryReadOffset = 0;
313 vs.MaximumNumberofThreads = device->info.max_vs_threads - 1;
314 vs.StatisticsEnable = false;
315 vs.SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL;
316 vs.VertexCacheDisable = false;
317 vs.FunctionEnable = true;
319 vs.VertexURBEntryOutputReadOffset = offset;
320 vs.VertexURBEntryOutputLength = length;
323 vs.UserClipDistanceClipTestEnableBitmask = 0;
324 vs.UserClipDistanceCullTestEnableBitmask = 0;
328 const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
329 if (pipeline->ps_ksp0 == NO_KERNEL) {
330 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
331 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
332 extra.PixelShaderValid = false;
335 emit_3dstate_sbe(pipeline);
337 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
338 ps.KernelStartPointer0 = pipeline->ps_ksp0;
339 ps.KernelStartPointer1 = 0;
340 ps.KernelStartPointer2 = pipeline->ps_ksp0 + wm_prog_data->prog_offset_2;
341 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
342 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
343 ps._32PixelDispatchEnable = false;
344 ps.SingleProgramFlow = false;
345 ps.VectorMaskEnable = true;
347 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
348 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
349 POSOFFSET_SAMPLE: POSOFFSET_NONE;
351 ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
353 ps.ScratchSpaceBasePointer = (struct anv_address) {
354 .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
355 MESA_SHADER_FRAGMENT,
356 wm_prog_data->base.total_scratch),
359 ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
361 ps.DispatchGRFStartRegisterForConstantSetupData0 =
362 wm_prog_data->base.dispatch_grf_start_reg;
363 ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
364 ps.DispatchGRFStartRegisterForConstantSetupData2 =
365 wm_prog_data->dispatch_grf_start_reg_2;
368 bool per_sample_ps = pCreateInfo->pMultisampleState &&
369 pCreateInfo->pMultisampleState->sampleShadingEnable;
371 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
372 ps.PixelShaderValid = true;
373 ps.PixelShaderKillsPixel = wm_prog_data->uses_kill;
374 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
375 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
376 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
377 ps.PixelShaderIsPerSample = per_sample_ps;
378 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
379 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
381 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
382 ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
383 ICMS_INNER_CONSERVATIVE : ICMS_NONE;
385 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
390 *pPipeline = anv_pipeline_to_handle(pipeline);