- win_flex_bison-2.4.5.zip
- llvm-3.3.1-msvc2013-mtd.7z
+os: Visual Studio 2013
+
environment:
WINFLEXBISON_ARCHIVE: win_flex_bison-2.4.5.zip
LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
- python -m pip --version
# Install Mako
- python -m pip install --egg Mako
+# Install pywin32 extensions, needed by SCons
+- python -m pip install pypiwin32
# Install SCons
- python -m pip install --egg scons==2.4.1
- scons --version
# The offending commit that this patch (part) reverts isn't in 12.0
be32a2132785fbc119f17e62070e007ee7d17af7 i965/compiler: Bring back the INTEL_PRECISE_TRIG environment variable
+
+# The patch depends on the batch_cache work at least.
+89f00f749fda4c1beca38f362c7f86bdc6e32785 a4xx: make sure to actually clamp depth as requested
AX_GCC_FUNC_ATTRIBUTE([pure])
AX_GCC_FUNC_ATTRIBUTE([returns_nonnull])
AX_GCC_FUNC_ATTRIBUTE([unused])
+AX_GCC_FUNC_ATTRIBUTE([visibility])
AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
AX_GCC_FUNC_ATTRIBUTE([weak])
esac
fi
+AC_HEADER_MAJOR
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
AC_ARG_WITH([vulkan-icddir],
[AS_HELP_STRING([--with-vulkan-icddir=DIR],
- [directory for the Vulkan driver icd files @<:@${sysconfdir}/vulkan/icd.d@:>@])],
+ [directory for the Vulkan driver icd files @<:@${datarootdir}/vulkan/icd.d@:>@])],
[VULKAN_ICD_INSTALL_DIR="$withval"],
[VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
AC_SUBST([VULKAN_ICD_INSTALL_DIR])
fi
}
+swr_require_cxx_feature_flags() {
+ feature_name="$1"
+ preprocessor_test="$2"
+ option_list="$3"
+ output_var="$4"
+
+ AC_MSG_CHECKING([whether $CXX supports $feature_name])
+ AC_LANG_PUSH([C++])
+ save_CXXFLAGS="$CXXFLAGS"
+ save_IFS="$IFS"
+ IFS=","
+ found=0
+ for opts in $option_list
+ do
+ unset IFS
+ CXXFLAGS="$opts $save_CXXFLAGS"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+ [ #if !($preprocessor_test)
+ #error
+ #endif
+ ])],
+ [found=1; break],
+ [])
+ IFS=","
+ done
+ IFS="$save_IFS"
+ CXXFLAGS="$save_CXXFLAGS"
+ AC_LANG_POP([C++])
+ if test $found -eq 1; then
+ AC_MSG_RESULT([$opts])
+ eval "$output_var=\$opts"
+ return 0
+ fi
+ AC_MSG_RESULT([no])
+ AC_MSG_ERROR([swr requires $feature_name support])
+ return 1
+}
+
dnl Duplicates in GALLIUM_DRIVERS_DIRS are removed by sorting it after this block
if test -n "$with_gallium_drivers"; then
gallium_drivers=`IFS=', '; echo $with_gallium_drivers`
xswr)
swr_llvm_check "swr"
- AC_MSG_CHECKING([whether $CXX supports c++11/AVX/AVX2])
- AVX_CXXFLAGS="-march=core-avx-i"
- AVX2_CXXFLAGS="-march=core-avx2"
-
- AC_LANG_PUSH([C++])
- save_CXXFLAGS="$CXXFLAGS"
- CXXFLAGS="-std=c++11 $CXXFLAGS"
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
- [AC_MSG_ERROR([c++11 compiler support not detected])])
- CXXFLAGS="$save_CXXFLAGS"
-
- save_CXXFLAGS="$CXXFLAGS"
- CXXFLAGS="$AVX_CXXFLAGS $CXXFLAGS"
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
- [AC_MSG_ERROR([AVX compiler support not detected])])
- CXXFLAGS="$save_CXXFLAGS"
-
- save_CFLAGS="$CXXFLAGS"
- CXXFLAGS="$AVX2_CXXFLAGS $CXXFLAGS"
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
- [AC_MSG_ERROR([AVX2 compiler support not detected])])
- CXXFLAGS="$save_CXXFLAGS"
- AC_LANG_POP([C++])
+ swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
+ ",-std=c++11" \
+ SWR_CXX11_CXXFLAGS
+ AC_SUBST([SWR_CXX11_CXXFLAGS])
+
+ swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
+ ",-mavx,-march=core-avx" \
+ SWR_AVX_CXXFLAGS
+ AC_SUBST([SWR_AVX_CXXFLAGS])
+
+ swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
+ ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+ SWR_AVX2_CXXFLAGS
+ AC_SUBST([SWR_AVX2_CXXFLAGS])
HAVE_GALLIUM_SWR=yes
;;
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.2 Release Notes / September 2, 2016</h1>
+
+<p>
+Mesa 12.0.2 is a bug fix release which fixes bugs found since the 12.0.1 release.
+</p>
+<p>
+Mesa 12.0.2 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3. OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+a08565ab1273751ebe2ffa928cbf785056594c803077c9719d0763da780f2918 mesa-12.0.2.tar.gz
+d957a5cc371dcd7ff2aa0d87492f263aece46f79352f4520039b58b1f32552cb mesa-12.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb->getFirst()->serial <= bb->getExit()->serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96381">Bug 96381</a> - Texture artifacts with immutable texture storage and mipmaps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - "gallium: Force blend color to 16-byte alignment" crash with "-march=native -O3" causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error "Failed to make EGL context current"</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97567">Bug 97567</a> - [SNB, ILK] ctl, piglit regressions in mesa 12.0.2rc1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andreas Boll (1):</p>
+<ul>
+ <li>configure.ac: Use ${datarootdir} for --with-vulkan-icddir help string too</li>
+</ul>
+
+<p>Bernard Kilarski (1):</p>
+<ul>
+ <li>glx: fix error code when there is no context bound</li>
+</ul>
+
+<p>Brian Paul (4):</p>
+<ul>
+ <li>svga: handle mismatched number of samplers, sampler views</li>
+ <li>mesa: use _mesa_clear_texture_image() in clear_texture_fields()</li>
+ <li>swrast: fix incorrectly positioned putImage() in swrast driver</li>
+ <li>mesa: fix format conversion bug in get_tex_rgba_uncompressed()</li>
+</ul>
+
+<p>Chad Versace (2):</p>
+<ul>
+ <li>i965: Fix miptree layout for EGLImage-based renderbuffers</li>
+ <li>i965: Respect miptree offsets in intel_readpixels_tiled_memcpy()</li>
+</ul>
+
+<p>Christian König (1):</p>
+<ul>
+ <li>st/mesa: fix reference counting bug in st_vdpau</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+ <li>swr: Refactor checks for compiler feature flags</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+ <li>mesa: Fix fixed function spot lighting on newer hardware (again)</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+ <li>anv: fix writemask on blit fragment shader.</li>
+ <li>st/glsl_to_tgsi: fix st_src_reg_for_double constant.</li>
+</ul>
+
+<p>Emil Velikov (15):</p>
+<ul>
+ <li>docs: add sha256 checksums for 12.0.1</li>
+ <li>mesa: automake: list builddir before srcdir</li>
+ <li>mesa: scons: list builddir before srcdir</li>
+ <li>i965: store reference to the context within struct brw_fence (v2)</li>
+ <li>anv: remove internal 'validate' layer</li>
+ <li>anv: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+ <li>anv: automake: build with -Bsymbolic</li>
+ <li>anv: do not export the Vulkan API</li>
+ <li>anv: remove dummy VK_DEBUG_MARKER_EXT entry points</li>
+ <li>isl: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+ <li>cherry-ignore: temporary(?) drop "a4xx: make sure to actually clamp depth"</li>
+ <li>i915: Check return value of screen->image.loader->getBuffers</li>
+ <li>Revert "i965/miptree: Set logical_depth0 == 6 for cube maps"</li>
+ <li>glx/glvnd: list the strcmp arguments in correct order</li>
+ <li>Update version to 12.0.2</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+ <li>vc4: Close our screen's fd on screen close.</li>
+ <li>vc4: Disable early Z with computed depth.</li>
+ <li>vc4: Fix a leak of the src[] array of VPM reads in optimization.</li>
+ <li>vc4: Fix leak of the bo_handles table.</li>
+</ul>
+
+<p>Francisco Jerez (3):</p>
+<ul>
+ <li>i965: Emit SKL VF cache invalidation W/A from brw_emit_pipe_control_flush.</li>
+ <li>i965: Make room in the batch epilogue for three more pipe controls.</li>
+ <li>i965: Fix remaining flush vs invalidate race conditions in brw_emit_pipe_control_flush.</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+ <li>platform_android: prevent deadlock in droid_swap_buffers</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+ <li>mesa: Strip arrayness from interface block names in some IO validation</li>
+ <li>glsl: Pack integer and double varyings as flat even if interpolation mode is none</li>
+ <li>glcpp: Track the actual version instead of just the version_resolved flag</li>
+ <li>glcpp: Only disallow #undef of pre-defined macros on GLSL ES >= 3.00 shaders</li>
+ <li>glsl: Mark cube map array sampler types as reserved in GLSL ES 3.10</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+ <li>mesa: etc2 online compression is unsupported, don't attempt it</li>
+ <li>st/mesa: return appropriate mesa format for ETC texture formats</li>
+ <li>mesa: set _NEW_BUFFERS when updating texture bound to current buffers</li>
+ <li>nv50,nvc0: srgb rendering is only available for rgba/bgra</li>
+ <li>vbo: allow DrawElementsBaseVertex in display lists</li>
+ <li>gallium/util: add helper to compute zmin/zmax for a viewport state</li>
+ <li>nv50,nvc0: fix depth range when halfz is enabled</li>
+ <li>nv50/ir: fix bb positions after exit instructions</li>
+ <li>vbo: add basevertex when looking up elements for vbo splitting</li>
+ <li>a4xx: only disable depth clipping, not all clipping, when requested</li>
+ <li>nv50/ir: make sure cfg iterator always hits all blocks</li>
+ <li>main: add missing EXTRA_END in OES_sample_variables get check</li>
+ <li>nouveau: always enable at least one RC</li>
+ <li>nv30: only bail on color/depth bpp mismatch when surfaces are swizzled</li>
+ <li>a4xx: make sure to actually clamp depth as requested</li>
+ <li>gk110/ir: fix quadop dall emission</li>
+</ul>
+
+<p>Jan Ziak (2):</p>
+<ul>
+ <li>egl/x11: avoid using freed memory if dri2 init fails</li>
+ <li>loader: fix memory leak in loader_dri3_open</li>
+</ul>
+
+<p>Jason Ekstrand (31):</p>
+<ul>
+ <li>nir/spirv: Don't multiply the push constant block size by 4</li>
+ <li>anv: Add a stub for CmdCopyQueryPoolResults on Ivy Bridge</li>
+ <li>glsl/types: Fix function type comparison function</li>
+ <li>glsl/types: Use _mesa_hash_data for hashing function types</li>
+ <li>genxml: Make gen6-7 blending look more like gen8</li>
+ <li>anv/pipeline: Unify blend state setup between gen7 and gen8</li>
+ <li>anv: Enable independentBlend on gen7</li>
+ <li>anv: Add an align_down_npot_u32 helper</li>
+ <li>anv: Handle VK_WHOLE_SIZE properly for buffer views</li>
+ <li>i965/miptree: Enforce that height == 1 for 1-D array textures</li>
+ <li>i965/miptree: Set logical_depth0 == 6 for cube maps</li>
+ <li>nir: Add a nir_deref_foreach_leaf helper</li>
+ <li>nir/inline: Constant-initialize local variables in the callee if needed</li>
+ <li>anv/pipeline: Set up point coord enables</li>
+ <li>i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations</li>
+ <li>i965/vec4: Make opt_vector_float reset at the top of each block</li>
+ <li>anv/blit2d: Add a format parameter to bind_dst and create_iview</li>
+ <li>anv/blit2d: Add support for RGB destinations</li>
+ <li>anv/clear: Make cmd_clear_image take an actual VkClearValue</li>
+ <li>anv/clear: Clear E5B9G9R9 images as R32_UINT</li>
+ <li>anv: Include the pipeline layout in the shader hash</li>
+ <li>isl: Allow multisampled array textures</li>
+ <li>anv/descriptor_set: memset anv_descriptor_set_layout</li>
+ <li>anv/pipeline: Fix bind maps for fragment output arrays</li>
+ <li>anv/allocator: Correctly set the number of buckets</li>
+ <li>anv/pipeline: Properly handle OOM during shader compilation</li>
+ <li>anv: Remove unused fields from anv_pipeline_bind_map</li>
+ <li>anv: Add pipeline_has_stage guards a few places</li>
+ <li>anv: Add a struct for storing a compiled shader</li>
+ <li>anv/pipeline: Add support for caching the push constant map</li>
+ <li>anv: Rework pipeline caching</li>
+</ul>
+
+<p>José Fonseca (2):</p>
+<ul>
+ <li>appveyor: Install pywin32 extensions.</li>
+ <li>appveyor: Force Visual Studio 2013 image.</li>
+</ul>
+
+<p>Kenneth Graunke (21):</p>
+<ul>
+ <li>genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.</li>
+ <li>genxml: Add APIMODE_D3D missing enum values and improve consistency.</li>
+ <li>anv: Fix near plane clipping on Gen7/7.5.</li>
+ <li>anv: Enable early culling on Gen7.</li>
+ <li>anv: Unify 3DSTATE_CLIP code across generations.</li>
+ <li>genxml: Rename "API Rendering Disable" to "Rendering Disable".</li>
+ <li>anv: Properly call gen75_emit_state_base_address on Haswell.</li>
+ <li>i965: Include VUE handles for GS with invocations > 1.</li>
+ <li>nir: Add a base const_index to shared atomic intrinsics.</li>
+ <li>i965: Fix shared atomic intrinsics to pay attention to base.</li>
+ <li>mesa: Add GL_BGRA_EXT to the list of GenerateMipmap internal formats.</li>
+ <li>mesa: Don't call GenerateMipmap if Width or Height == 0.</li>
+ <li>glsl: Delete bogus ir_set_program_inouts assert.</li>
+ <li>glsl: Fix the program resource names of gl_TessLevelOuter/Inner[].</li>
+ <li>glsl: Fix location bias for patch variables.</li>
+ <li>glsl: Fix invariant matching in GLSL 4.30 and GLSL ES 1.00.</li>
+ <li>mesa: Fix uf10_to_f32() scale factor in the E == 0 and M != 0 case.</li>
+ <li>nir/builder: Add bany_inequal and bany helpers.</li>
+ <li>i965: Implement the WaPreventHSTessLevelsInterference workaround.</li>
+ <li>i965: Fix execution size of scalar TCS barrier setup code.</li>
+ <li>i965: Fix barrier count shift in scalar TCS backend.</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+ <li>st/omx/enc: check uninitialized list from task release</li>
+ <li>vl/dri3: fix a memory leak from front buffer</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+ <li>glsl_to_tgsi: don't use the negate modifier in integer ops after bitcast</li>
+ <li>radeonsi: add a workaround for a compute VGPR-usage LLVM bug</li>
+ <li>winsys/amdgpu: disallow DCC with mipmaps</li>
+ <li>gallium/util: fix align64</li>
+ <li>radeonsi: only set dual source blending for MRT0</li>
+ <li>radeonsi: fix VM faults due NULL internal const buffers on CIK</li>
+ <li>radeonsi: disable SDMA texture copying on Carrizo</li>
+</ul>
+
+<p>Matt Turner (4):</p>
+<ul>
+ <li>mapi: Massage code to allow clang to compile.</li>
+ <li>i965/vec4: Ignore swizzle of VGRF for use by var_range_end().</li>
+ <li>mesa: Use AC_HEADER_MAJOR to include correct header for major().</li>
+ <li>nir: Walk blocks in source code order in lower_vars_to_ssa.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+ <li>glx: Don't use current context in __glXSendError</li>
+</ul>
+
+<p>Miklós Máté (1):</p>
+<ul>
+ <li>vbo: set draw_id</li>
+</ul>
+
+<p>Nanley Chery (5):</p>
+<ul>
+ <li>anv/descriptor_set: Fix binding partly undefined descriptor sets</li>
+ <li>isl: Fix assert on raw buffer surface state size</li>
+ <li>anv/device: Fix max buffer range limits</li>
+ <li>isl: Fix isl_tiling_is_any_y()</li>
+ <li>anv/gen7_pipeline: Set PixelShaderKillPixel for discards</li>
+</ul>
+
+<p>Nicolai Hähnle (7):</p>
+<ul>
+ <li>radeonsi: explicitly choose center locations for 1xAA on Polaris</li>
+ <li>radeonsi: fix Polaris MSAA regression</li>
+ <li>radeonsi: ensure sample locations are set for line and polygon smoothing</li>
+ <li>st_glsl_to_tgsi: only skip over slots of an input array that are present</li>
+ <li>glsl: fix optimization of discard nested multiple levels</li>
+ <li>radeonsi: flush TC L2 cache for indirect draw data</li>
+ <li>radeonsi: add si_set_rw_buffer to be used for internal descriptors</li>
+</ul>
+
+<p>Nicolas Boichat (6):</p>
+<ul>
+ <li>egl/dri2: dri2_make_current: Set EGL error if bindContext fails</li>
+ <li>egl/wayland: Set disp->DriverData to NULL on error</li>
+ <li>egl/surfaceless: Set disp->DriverData to NULL on error</li>
+ <li>egl/drm: Set disp->DriverData to NULL on error</li>
+ <li>egl/android: Set dpy->DriverData to NULL on error</li>
+ <li>egl/dri2: Add reference count for dri2_egl_display</li>
+</ul>
+
+<p>Rob Herring (3):</p>
+<ul>
+ <li>Android: add missing u_math.h include path for libmesa_isl</li>
+ <li>vc4: fix vc4_resource_from_handle() stride calculation</li>
+ <li>vc4: add hash table look-up for exported dmabufs</li>
+</ul>
+
+<p>Samuel Pitoiset (7):</p>
+<ul>
+ <li>nvc0/ir: fix images indirect access on Fermi</li>
+ <li>nvc0: fix the driver cb size when draw parameters are used</li>
+ <li>gm107/ir: add missing NEG modifier for IADD32I</li>
+ <li>gm107/ir: make use of ADD32I for all immediates</li>
+ <li>nvc0: upload sample locations on GM20x</li>
+ <li>nvc0: invalidate textures/samplers on GK104+</li>
+ <li>nv50/ir: always emit the NDV bit for OP_QUADOP</li>
+</ul>
+
+<p>Stefan Dirsch (1):</p>
+<ul>
+ <li>Avoid overflow in 'last' variable of FindGLXFunction(...)</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+ <li>egl/wayland-egl: Fix for segfault in dri2_wl_destroy_surface.</li>
+</ul>
+
+<p>Tim Rowley (2):</p>
+<ul>
+ <li>Revert "gallium: Force blend color to 16-byte alignment"</li>
+ <li>swr: switch from overriding -march to selecting features</li>
+</ul>
+
+<p>Tomasz Figa (8):</p>
+<ul>
+ <li>gallium/dri: Add shared glapi to LIBADD on Android</li>
+ <li>egl/android: Remove unused variables</li>
+ <li>egl/android: Check return value of dri2_get_dri_config()</li>
+ <li>egl/android: Stop leaking DRI images</li>
+ <li>gallium/winsys/kms: Fix double refcount when importing from prime FD (v2)</li>
+ <li>gallium/winsys/kms: Fully initialize kms_sw_dt at prime import time (v2)</li>
+ <li>gallium/winsys/kms: Move display target handle lookup to separate function</li>
+ <li>gallium/winsys/kms: Look up the GEM handle after importing a prime FD</li>
+</ul>
+
+
+</div>
+</body>
+</html>
HASH_TOKEN DEFINE_TOKEN define
| HASH_TOKEN UNDEF IDENTIFIER NEWLINE {
macro_t *macro;
- if (strcmp("__LINE__", $3) == 0
- || strcmp("__FILE__", $3) == 0
- || strcmp("__VERSION__", $3) == 0
- || strncmp("GL_", $3, 3) == 0)
+
+ /* Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says:
+ *
+ * It is an error to undefine or to redefine a built-in
+ * (pre-defined) macro name.
+ *
+ * The GLSL ES 1.00 spec does not contain this text.
+ *
+ * Section 3.3 (Preprocessor) of the GLSL 1.30 spec says:
+ *
+ * #define and #undef functionality are defined as is
+ * standard for C++ preprocessors for macro definitions
+ * both with and without macro parameters.
+ *
+ * At least as far as I can tell GCC allow '#undef __FILE__'.
+ * Furthermore, there are desktop OpenGL conformance tests
+ * that expect '#undef __VERSION__' and '#undef
+ * GL_core_profile' to work.
+ *
+ * Only disallow #undef of pre-defined macros on GLSL ES >=
+ * 3.00 shaders.
+ */
+ if (parser->is_gles &&
+ parser->version >= 300 &&
+ (strcmp("__LINE__", $3) == 0
+ || strcmp("__FILE__", $3) == 0
+ || strcmp("__VERSION__", $3) == 0
+ || strncmp("GL_", $3, 3) == 0))
glcpp_error(& @1, parser, "Built-in (pre-defined)"
" macro names cannot be undefined.");
_glcpp_parser_skip_stack_pop (parser, & @1);
} NEWLINE
| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
- if (parser->version_resolved) {
+ if (parser->version != 0) {
glcpp_error(& @1, parser, "#version must appear on the first line");
}
_glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
}
| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
- if (parser->version_resolved) {
+ if (parser->version != 0) {
glcpp_error(& @1, parser, "#version must appear on the first line");
}
_glcpp_parser_handle_version_declaration(parser, $3, $4, true);
parser->extensions = extensions;
parser->api = api;
- parser->version_resolved = false;
+ parser->version = 0;
parser->has_new_line_number = 0;
parser->new_line_number = 1;
{
const struct gl_extensions *extensions = parser->extensions;
- if (parser->version_resolved)
+ if (parser->version != 0)
return;
- parser->version_resolved = true;
+ parser->version = version;
add_builtin_define (parser, "__VERSION__", version);
int error;
const struct gl_extensions *extensions;
gl_api api;
- bool version_resolved;
+ unsigned version;
bool has_new_line_number;
int new_line_number;
bool has_new_source_number;
+#version 300 es
#undef __LINE__
#undef __FILE__
#undef __VERSION__
-0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+0:4(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+#version 300 es
--- /dev/null
+#version 110
+#undef __LINE__
+#undef __FILE__
+#undef __VERSION__
--- /dev/null
+#version 110
+
+
+
usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);
/* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
-samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
-isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
-usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
-samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
+samplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
+isamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
+usamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
+samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
samplerExternalOES {
if (yyextra->OES_EGL_image_external_enable)
* variables. As only outputs can be declared as invariant, an invariant
* output from one shader stage will still match an input of a subsequent
* stage without the input being declared as invariant."
+ *
+ * On the desktop side, this text first appears in GLSL 4.30.
*/
- if (state->es_shader && state->language_version >= 300 && $$.flags.q.in)
+ if (state->is_version(430, 300) && $$.flags.q.in)
_mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs");
}
| interpolation_qualifier type_qualifier
return this->u.state_slots;
}
+ inline bool is_interpolation_flat() const
+ {
+ return this->data.interpolation == INTERP_QUALIFIER_FLAT ||
+ this->type->contains_integer() ||
+ this->type->contains_double();
+ }
+
inline bool is_name_ralloced() const
{
return this->name != ir_variable::tmp_name;
* lowering passes (do_vec_index_to_swizzle() gets rid of indexing into
* vectors, and lower_packed_varyings() gets rid of structs that occur in
* varyings).
+ *
+ * However, we don't use varying packing in all cases - tessellation
+ * shaders bypass it. This means we'll see varying structs and arrays
+ * of structs here. For now, we just give up so the caller marks the
+ * entire variable as used.
*/
if (!(type->is_matrix() ||
(type->is_array() &&
(type->fields.array->is_numeric() ||
type->fields.array->is_boolean())))) {
- assert(!"Unexpected indexing in ir_set_program_inouts");
- /* For safety in release builds, in case we ever encounter unexpected
- * indexing, give up and let the caller mark the whole variable as used.
+ /* If we don't know how to handle this case, give up and let the
+ * caller mark the whole variable as used.
*/
return false;
}
return;
}
- if (!prog->IsES && input->data.invariant != output->data.invariant) {
+ /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
+ *
+ * "As only outputs need be declared with invariant, an output from
+ * one shader stage will still match an input of a subsequent stage
+ * without the input being declared as invariant."
+ *
+ * while GLSL 4.20 says:
+ *
+ * "For variables leaving one shader and coming into another shader,
+ * the invariant keyword has to be used in both shaders, or a link
+ * error will result."
+ *
+ * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
+ *
+ * "The invariance of varyings that are declared in both the vertex
+ * and fragment shaders must match."
+ */
+ if (input->data.invariant != output->data.invariant &&
+ prog->Version < (prog->IsES ? 300 : 430)) {
linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",
unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
(var->data.patch << 2);
packing_class *= 4;
- packing_class += var->data.interpolation;
+ packing_class += var->is_interpolation_flat()
+ ? unsigned(INTERP_QUALIFIER_FLAT) : var->data.interpolation;
return packing_class;
}
if (in->data.mode == ir_var_system_value &&
in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
out->name = ralloc_strdup(shProg, "gl_VertexID");
+ } else if ((in->data.mode == ir_var_shader_out &&
+ in->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) ||
+ (in->data.mode == ir_var_system_value &&
+ in->data.location == SYSTEM_VALUE_TESS_LEVEL_OUTER)) {
+ out->name = ralloc_strdup(shProg, "gl_TessLevelOuter");
+ type = glsl_type::get_array_instance(glsl_type::float_type, 4);
+ } else if ((in->data.mode == ir_var_shader_out &&
+ in->data.location == VARYING_SLOT_TESS_LEVEL_INNER) ||
+ (in->data.mode == ir_var_system_value &&
+ in->data.location == SYSTEM_VALUE_TESS_LEVEL_INNER)) {
+ out->name = ralloc_strdup(shProg, "gl_TessLevelInner");
+ type = glsl_type::get_array_instance(glsl_type::float_type, 2);
} else {
out->name = ralloc_strdup(shProg, name);
}
continue;
};
+ if (var->data.patch)
+ loc_bias = int(VARYING_SLOT_PATCH0);
+
/* Skip packed varyings, packed varyings are handled separately
* by add_packed_varyings.
*/
continue;
/* This lowering pass is only capable of packing floats and ints
- * together when their interpolation mode is "flat". Therefore, to be
- * safe, caller should ensure that integral varyings always use flat
- * interpolation, even when this is not required by GLSL.
+ * together when their interpolation mode is "flat". Treat integers as
+ * being flat when the interpolation mode is none.
*/
assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
+ var->data.interpolation == INTERP_QUALIFIER_NONE ||
!var->type->contains_integer());
/* Clone the variable for program resource list before
if (this->packed_varyings[slot] == NULL) {
char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
const glsl_type *packed_type;
- if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
+ if (unpacked_var->is_interpolation_flat())
packed_type = glsl_type::ivec4_type;
else
packed_type = glsl_type::vec4_type;
packed_var->data.centroid = unpacked_var->data.centroid;
packed_var->data.sample = unpacked_var->data.sample;
packed_var->data.patch = unpacked_var->data.patch;
- packed_var->data.interpolation = unpacked_var->data.interpolation;
+ packed_var->data.interpolation = packed_type == glsl_type::ivec4_type
+ ? unsigned(INTERP_QUALIFIER_FLAT) : unpacked_var->data.interpolation;
packed_var->data.location = location;
packed_var->data.precision = unpacked_var->data.precision;
packed_var->data.always_active_io = unpacked_var->data.always_active_io;
/* Move the condition and replace the ir_if with the ir_discard. */
ir_discard *discard = (ir_discard *) ir->then_instructions.head;
- discard->condition = ir->condition;
+ if (!discard->condition)
+ discard->condition = ir->condition;
+ else {
+ void *ctx = ralloc_parent(ir);
+ discard->condition = new(ctx) ir_expression(ir_binop_logic_and,
+ ir->condition,
+ discard->condition);
+ }
ir->replace_with(discard);
progress = true;
const glsl_type *const key2 = (glsl_type *) b;
if (key1->length != key2->length)
- return 1;
+ return false;
return memcmp(key1->fields.parameters, key2->fields.parameters,
(key1->length + 1) * sizeof(*key1->fields.parameters)) == 0;
function_key_hash(const void *a)
{
const glsl_type *const key = (glsl_type *) a;
- char hash_key[128];
- unsigned size = 0;
-
- size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
-
- for (unsigned i = 0; i < key->length; i++) {
- if (size >= sizeof(hash_key))
- break;
-
- size += snprintf(& hash_key[size], sizeof(hash_key) - size,
- "%p", (void *) key->fields.structure[i].type);
- }
-
- return _mesa_hash_string(hash_key);
+ return _mesa_hash_data(key->fields.parameters,
+ (key->length + 1) * sizeof(*key->fields.parameters));
}
const glsl_type *
return NULL;
}
+/* This is the second step in the recursion. We've found the tail and made a
+ * copy. Now we need to iterate over all possible leaves and call the
+ * callback on each one.
+ */
+static bool
+deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail,
+ nir_deref_foreach_leaf_cb cb, void *state)
+{
+ unsigned length;
+ union {
+ nir_deref_array arr;
+ nir_deref_struct str;
+ } tmp;
+
+ assert(tail->child == NULL);
+ switch (glsl_get_base_type(tail->type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ if (glsl_type_is_vector_or_scalar(tail->type))
+ return cb(deref, state);
+ /* Fall Through */
+
+ case GLSL_TYPE_ARRAY:
+ tmp.arr.deref.deref_type = nir_deref_type_array;
+ tmp.arr.deref.type = glsl_get_array_element(tail->type);
+ tmp.arr.deref_array_type = nir_deref_array_type_direct;
+ tmp.arr.indirect = NIR_SRC_INIT;
+ tail->child = &tmp.arr.deref;
+
+ length = glsl_get_length(tail->type);
+ for (unsigned i = 0; i < length; i++) {
+ tmp.arr.deref.child = NULL;
+ tmp.arr.base_offset = i;
+ if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+ return false;
+ }
+ return true;
+
+ case GLSL_TYPE_STRUCT:
+ tmp.str.deref.deref_type = nir_deref_type_struct;
+ tail->child = &tmp.str.deref;
+
+ length = glsl_get_length(tail->type);
+ for (unsigned i = 0; i < length; i++) {
+ tmp.arr.deref.child = NULL;
+ tmp.str.deref.type = glsl_get_struct_field(tail->type, i);
+ tmp.str.index = i;
+ if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+ return false;
+ }
+ return true;
+
+ default:
+ unreachable("Invalid type for dereference");
+ }
+}
+
+/* This is the first step of the foreach_leaf recursion. In this step we are
+ * walking to the end of the deref chain and making a copy in the stack as we
+ * go. This is because we don't want to mutate the deref chain that was
+ * passed in by the caller. The downside is that this deref chain is on the
+ * stack and , if the caller wants to do anything with it, they will have to
+ * make their own copy because this one will go away.
+ */
+static bool
+deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail,
+ nir_deref_foreach_leaf_cb cb, void *state)
+{
+ union {
+ nir_deref_array arr;
+ nir_deref_struct str;
+ } c;
+
+ if (tail->child) {
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array:
+ c.arr = *nir_deref_as_array(tail->child);
+ tail->child = &c.arr.deref;
+ return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state);
+
+ case nir_deref_type_struct:
+ c.str = *nir_deref_as_struct(tail->child);
+ tail->child = &c.str.deref;
+ return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state);
+
+ case nir_deref_type_var:
+ default:
+ unreachable("Invalid deref type for a child");
+ }
+ } else {
+ /* We've gotten to the end of the original deref. Time to start
+ * building our own derefs.
+ */
+ return deref_foreach_leaf_build_recur(deref, tail, cb, state);
+ }
+}
+
+/**
+ * This function iterates over all of the possible derefs that can be created
+ * with the given deref as the head. It then calls the provided callback with
+ * a full deref for each one.
+ *
+ * The deref passed to the callback will be allocated on the stack. You will
+ * need to make a copy if you want it to hang around.
+ */
+bool
+nir_deref_foreach_leaf(nir_deref_var *deref,
+ nir_deref_foreach_leaf_cb cb, void *state)
+{
+ nir_deref_var copy = *deref;
+ return deref_foreach_leaf_copy_recur(©, ©.deref, cb, state);
+}
+
/* Returns a load_const instruction that represents the constant
* initializer for the given deref chain. The caller is responsible for
* ensuring that there actually is a constant initializer.
nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+typedef bool (*nir_deref_foreach_leaf_cb)(nir_deref_var *deref, void *state);
+bool nir_deref_foreach_leaf(nir_deref_var *deref,
+ nir_deref_foreach_leaf_cb cb, void *state);
+
nir_load_const_instr *
nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
}
static inline nir_ssa_def *
+nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ switch (src0->num_components) {
+ case 1: return nir_ine(b, src0, src1);
+ case 2: return nir_bany_inequal2(b, src0, src1);
+ case 3: return nir_bany_inequal3(b, src0, src1);
+ case 4: return nir_bany_inequal4(b, src0, src1);
+ default:
+ unreachable("bad component size");
+ }
+}
+
+static inline nir_ssa_def *
+nir_bany(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_bany_inequal(b, src, nir_imm_int(b, 0));
+}
+
+static inline nir_ssa_def *
nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
{
unsigned swizzle[4] = {c, c, c, c};
#include "nir_builder.h"
#include "nir_control_flow.h"
+static bool
+deref_apply_constant_initializer(nir_deref_var *deref, void *state)
+{
+ struct nir_builder *b = state;
+
+ nir_load_const_instr *initializer =
+ nir_deref_get_const_initializer_load(b->shader, deref);
+ nir_builder_instr_insert(b, &initializer->instr);
+
+ nir_store_deref_var(b, deref, &initializer->def, 0xf);
+
+ return true;
+}
+
static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
static void
/* Add copies of all in parameters */
assert(call->num_params == callee_copy->num_params);
+ b->cursor = nir_before_instr(&call->instr);
+
+ /* Before we insert the copy of the function, we need to lower away
+ * constant initializers on local variables. This is because constant
+ * initializers happen (effectively) at the top of the function and,
+ * since these are about to become locals of the calling function,
+ * initialization will happen at the top of the caller rather than at
+ * the top of the callee. This isn't usually a problem, but if we are
+ * being inlined inside of a loop, it can result in the variable not
+ * getting re-initialized properly for all loop iterations.
+ */
+ nir_foreach_variable(local, &callee_copy->locals) {
+ if (!local->constant_initializer)
+ continue;
+
+ nir_deref_var deref;
+ deref.deref.deref_type = nir_deref_type_var,
+ deref.deref.child = NULL;
+ deref.deref.type = local->type,
+ deref.var = local;
+
+ nir_deref_foreach_leaf(&deref, deref_apply_constant_initializer, b);
+
+ local->constant_initializer = NULL;
+ }
+
exec_list_append(&b->impl->locals, &callee_copy->locals);
exec_list_append(&b->impl->registers, &callee_copy->registers);
- b->cursor = nir_before_instr(&call->instr);
-
/* We now need to tie the two functions together using the
* parameters. There are two ways we do this: One is to turn the
* parameter into a local variable and do a shadow-copy. The other
* in shared_atomic_add, etc).
* 2: For CompSwap only: the second data parameter.
*/
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
return true;
}
-/* Performs variable renaming by doing a DFS of the dominance tree
+/* Performs variable renaming
*
* This algorithm is very similar to the one outlined in "Efficiently
* Computing Static Single Assignment Form and the Control Dependence
* SSA def on the stack per block.
*/
static bool
-rename_variables_block(nir_block *block, struct lower_variables_state *state)
+rename_variables(struct lower_variables_state *state)
{
nir_builder b;
nir_builder_init(&b, state->impl);
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
- switch (intrin->intrinsic) {
- case nir_intrinsic_load_var: {
- struct deref_node *node =
- get_deref_node(intrin->variables[0], state);
-
- if (node == NULL) {
- /* If we hit this path then we are referencing an invalid
- * value. Most likely, we unrolled something and are
- * reading past the end of some array. In any case, this
- * should result in an undefined value.
- */
- nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(state->shader,
- intrin->num_components,
- intrin->dest.ssa.bit_size);
-
- nir_instr_insert_before(&intrin->instr, &undef->instr);
- nir_instr_remove(&intrin->instr);
-
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&undef->def));
+ nir_foreach_block(block, state->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
continue;
- }
- if (!node->lower_to_ssa)
- continue;
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var: {
+ struct deref_node *node =
+ get_deref_node(intrin->variables[0], state);
+
+ if (node == NULL) {
+ /* If we hit this path then we are referencing an invalid
+ * value. Most likely, we unrolled something and are
+ * reading past the end of some array. In any case, this
+ * should result in an undefined value.
+ */
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(state->shader,
+ intrin->num_components,
+ intrin->dest.ssa.bit_size);
+
+ nir_instr_insert_before(&intrin->instr, &undef->instr);
+ nir_instr_remove(&intrin->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&undef->def));
+ continue;
+ }
- nir_alu_instr *mov = nir_alu_instr_create(state->shader,
- nir_op_imov);
- mov->src[0].src = nir_src_for_ssa(
- nir_phi_builder_value_get_block_def(node->pb_value, block));
- for (unsigned i = intrin->num_components; i < 4; i++)
- mov->src[0].swizzle[i] = 0;
+ if (!node->lower_to_ssa)
+ continue;
- assert(intrin->dest.is_ssa);
+ nir_alu_instr *mov = nir_alu_instr_create(state->shader,
+ nir_op_imov);
+ mov->src[0].src = nir_src_for_ssa(
+ nir_phi_builder_value_get_block_def(node->pb_value, block));
+ for (unsigned i = intrin->num_components; i < 4; i++)
+ mov->src[0].swizzle[i] = 0;
- mov->dest.write_mask = (1 << intrin->num_components) - 1;
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components,
- intrin->dest.ssa.bit_size, NULL);
+ assert(intrin->dest.is_ssa);
- nir_instr_insert_before(&intrin->instr, &mov->instr);
- nir_instr_remove(&intrin->instr);
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&mov->dest.dest.ssa));
- break;
- }
-
- case nir_intrinsic_store_var: {
- struct deref_node *node =
- get_deref_node(intrin->variables[0], state);
-
- if (node == NULL) {
- /* Probably an out-of-bounds array store. That should be a
- * no-op. */
+ nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
- continue;
- }
- if (!node->lower_to_ssa)
- continue;
-
- assert(intrin->num_components ==
- glsl_get_vector_elements(node->type));
-
- assert(intrin->src[0].is_ssa);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&mov->dest.dest.ssa));
+ break;
+ }
- nir_ssa_def *new_def;
- b.cursor = nir_before_instr(&intrin->instr);
+ case nir_intrinsic_store_var: {
+ struct deref_node *node =
+ get_deref_node(intrin->variables[0], state);
- unsigned wrmask = nir_intrinsic_write_mask(intrin);
- if (wrmask == (1 << intrin->num_components) - 1) {
- /* Whole variable store - just copy the source. Note that
- * intrin->num_components and intrin->src[0].ssa->num_components
- * may differ.
- */
- unsigned swiz[4];
- for (unsigned i = 0; i < 4; i++)
- swiz[i] = i < intrin->num_components ? i : 0;
+ if (node == NULL) {
+ /* Probably an out-of-bounds array store. That should be a
+ * no-op. */
+ nir_instr_remove(&intrin->instr);
+ continue;
+ }
- new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
- intrin->num_components, false);
- } else {
- nir_ssa_def *old_def =
- nir_phi_builder_value_get_block_def(node->pb_value, block);
- /* For writemasked store_var intrinsics, we combine the newly
- * written values with the existing contents of unwritten
- * channels, creating a new SSA value for the whole vector.
- */
- nir_ssa_def *srcs[4];
- for (unsigned i = 0; i < intrin->num_components; i++) {
- if (wrmask & (1 << i)) {
- srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
- } else {
- srcs[i] = nir_channel(&b, old_def, i);
+ if (!node->lower_to_ssa)
+ continue;
+
+ assert(intrin->num_components ==
+ glsl_get_vector_elements(node->type));
+
+ assert(intrin->src[0].is_ssa);
+
+ nir_ssa_def *new_def;
+ b.cursor = nir_before_instr(&intrin->instr);
+
+ unsigned wrmask = nir_intrinsic_write_mask(intrin);
+ if (wrmask == (1 << intrin->num_components) - 1) {
+ /* Whole variable store - just copy the source. Note that
+ * intrin->num_components and intrin->src[0].ssa->num_components
+ * may differ.
+ */
+ unsigned swiz[4];
+ for (unsigned i = 0; i < 4; i++)
+ swiz[i] = i < intrin->num_components ? i : 0;
+
+ new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+ intrin->num_components, false);
+ } else {
+ nir_ssa_def *old_def =
+ nir_phi_builder_value_get_block_def(node->pb_value, block);
+ /* For writemasked store_var intrinsics, we combine the newly
+ * written values with the existing contents of unwritten
+ * channels, creating a new SSA value for the whole vector.
+ */
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < intrin->num_components; i++) {
+ if (wrmask & (1 << i)) {
+ srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+ } else {
+ srcs[i] = nir_channel(&b, old_def, i);
+ }
}
+ new_def = nir_vec(&b, srcs, intrin->num_components);
}
- new_def = nir_vec(&b, srcs, intrin->num_components);
- }
- assert(new_def->num_components == intrin->num_components);
+ assert(new_def->num_components == intrin->num_components);
- nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
- nir_instr_remove(&intrin->instr);
- break;
- }
+ nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
+ nir_instr_remove(&intrin->instr);
+ break;
+ }
- default:
- break;
+ default:
+ break;
+ }
}
}
- for (unsigned i = 0; i < block->num_dom_children; ++i)
- rename_variables_block(block->dom_children[i], state);
-
return true;
}
}
}
- rename_variables_block(nir_start_block(impl), &state);
+ rename_variables(&state);
nir_phi_builder_finish(state.phi_builder);
* var.pb_val = nir_phi_builder_add_value(pb, var.defs)
*
* // Visit each block. This needs to visit dominators first;
- * // nir_for_each_block() will be ok.
+ * // nir_foreach_block() will be ok.
+ *
* foreach block:
* foreach instruction:
* foreach use of variable var:
case SpvStorageClassPushConstant:
var->mode = vtn_variable_mode_push_constant;
assert(b->shader->num_uniforms == 0);
- b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
+ b->shader->num_uniforms = vtn_type_block_size(var->type);
break;
case SpvStorageClassInput:
var->mode = vtn_variable_mode_input;
/**
* Called via eglInitialize(), GLX_drv->API.Initialize().
+ *
+ * This must be guaranteed to be called exactly once, even if eglInitialize is
+ * called many times (without a eglTerminate in between).
*/
static EGLBoolean
dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
{
+ EGLBoolean ret = EGL_FALSE;
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+ /* In the case where the application calls eglMakeCurrent(context1),
+ * eglTerminate, then eglInitialize again (without a call to eglReleaseThread
+ * or eglMakeCurrent(NULL) before that), dri2_dpy structure is still
+ * initialized, as we need it to be able to free context1 correctly.
+ *
+ * It would probably be safest to forcibly release the display with
+ * dri2_display_release, to make sure the display is reinitialized correctly.
+ * However, the EGL spec states that we need to keep a reference to the
+ * current context (so we cannot call dri2_make_current(NULL)), and therefore
+ * we would leak context1 as we would be missing the old display connection
+ * to free it up correctly.
+ */
+ if (dri2_dpy) {
+ dri2_dpy->ref_count++;
+ return EGL_TRUE;
+ }
+
/* not until swrast_dri is supported */
if (disp->Options.UseFallback)
return EGL_FALSE;
+ /* Nothing to initialize for a test only display */
+ if (disp->Options.TestOnly)
+ return EGL_TRUE;
+
switch (disp->Platform) {
#ifdef HAVE_SURFACELESS_PLATFORM
case _EGL_PLATFORM_SURFACELESS:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_surfaceless(drv, disp);
+ ret = dri2_initialize_surfaceless(drv, disp);
+ break;
#endif
-
#ifdef HAVE_X11_PLATFORM
case _EGL_PLATFORM_X11:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_x11(drv, disp);
+ ret = dri2_initialize_x11(drv, disp);
+ break;
#endif
-
#ifdef HAVE_DRM_PLATFORM
case _EGL_PLATFORM_DRM:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_drm(drv, disp);
+ ret = dri2_initialize_drm(drv, disp);
+ break;
#endif
#ifdef HAVE_WAYLAND_PLATFORM
case _EGL_PLATFORM_WAYLAND:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_wayland(drv, disp);
+ ret = dri2_initialize_wayland(drv, disp);
+ break;
#endif
#ifdef HAVE_ANDROID_PLATFORM
case _EGL_PLATFORM_ANDROID:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_android(drv, disp);
+ ret = dri2_initialize_android(drv, disp);
+ break;
#endif
-
default:
_eglLog(_EGL_WARNING, "No EGL platform enabled.");
return EGL_FALSE;
}
+
+ if (ret) {
+ dri2_dpy = dri2_egl_display(disp);
+
+ if (!dri2_dpy) {
+ return EGL_FALSE;
+ }
+
+ dri2_dpy->ref_count++;
+ }
+
+ return ret;
}
/**
- * Called via eglTerminate(), drv->API.Terminate().
+ * Decrement display reference count, and free up display if necessary.
*/
-static EGLBoolean
-dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
-{
+static void
+dri2_display_release(_EGLDisplay *disp) {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
unsigned i;
- _eglReleaseDisplayResources(drv, disp);
+ assert(dri2_dpy->ref_count > 0);
+ dri2_dpy->ref_count--;
+
+ if (dri2_dpy->ref_count > 0)
+ return;
+
_eglCleanupDisplay(disp);
if (dri2_dpy->own_dri_screen)
}
free(dri2_dpy);
disp->DriverData = NULL;
+}
+
+/**
+ * Called via eglTerminate(), drv->API.Terminate().
+ *
+ * This must be guaranteed to be called exactly once, even if eglTerminate is
+ * called many times (without a eglInitialize in between).
+ */
+static EGLBoolean
+dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
+{
+ /* Release all non-current Context/Surfaces. */
+ _eglReleaseDisplayResources(drv, disp);
+
+ dri2_display_release(disp);
return EGL_TRUE;
}
_EGLSurface *tmp_dsurf, *tmp_rsurf;
__DRIdrawable *ddraw, *rdraw;
__DRIcontext *cctx;
+ EGLBoolean unbind;
+
+ if (!dri2_dpy)
+ return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent");
/* make new bindings */
- if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf))
+ if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) {
+ /* _eglBindContext already sets the EGL error (in _eglCheckMakeCurrent) */
return EGL_FALSE;
+ }
/* flush before context switch */
if (old_ctx && dri2_drv->glFlush)
dri2_dpy->core->unbindContext(old_cctx);
}
- if ((cctx == NULL && ddraw == NULL && rdraw == NULL) ||
- dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
+ unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL);
+
+ if (unbind || dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
if (old_dsurf)
drv->API.DestroySurface(drv, disp, old_dsurf);
if (old_rsurf)
drv->API.DestroySurface(drv, disp, old_rsurf);
- if (old_ctx)
+
+ if (!unbind)
+ dri2_dpy->ref_count++;
+ if (old_ctx) {
+ EGLDisplay old_disp = _eglGetDisplayHandle(old_ctx->Resource.Display);
drv->API.DestroyContext(drv, disp, old_ctx);
+ dri2_display_release(old_disp);
+ }
return EGL_TRUE;
} else {
_eglPutSurface(old_rsurf);
_eglPutContext(old_ctx);
- return EGL_FALSE;
+ /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but
+ * setting the error to EGL_BAD_MATCH is surely better than leaving it
+ * as EGL_SUCCESS.
+ */
+ return _eglError(EGL_BAD_MATCH, "eglMakeCurrent");
}
}
const __DRI2interopExtension *interop;
int fd;
+ /* dri2_initialize/dri2_terminate increment/decrement this count, so does
+ * dri2_make_current (tracks if there are active contexts/surfaces). */
+ int ref_count;
+
int own_device;
int swap_available;
int invalidate_available;
static EGLBoolean
droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
/* To avoid blocking other EGL calls, release the display mutex before
* we enter droid_window_enqueue_buffer() and re-acquire the mutex upon
* return.
dri2_surf->buffer = NULL;
mtx_lock(&disp->Mutex);
+
+ if (dri2_surf->dri_image) {
+ dri2_dpy->image->destroyImage(dri2_surf->dri_image);
+ dri2_surf->dri_image = NULL;
+ }
+
return EGL_TRUE;
}
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
dri2_surf->base.GLColorspace);
+ if (!config)
+ goto cleanup_surface;
if (dri2_dpy->dri2) {
dri2_surf->dri_drawable =
int fourcc, pitch;
int offset = 0, fd;
+ if (dri2_surf->dri_image)
+ return 0;
+
if (!dri2_surf->buffer)
return -1;
static EGLBoolean
droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
{
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
- _EGLContext *ctx;
if (dri2_surf->base.Type != EGL_WINDOW_BIT)
return EGL_TRUE;
close(dri2_dpy->fd);
cleanup_display:
free(dri2_dpy);
+ dpy->DriverData = NULL;
return _eglError(EGL_NOT_INITIALIZED, err);
}
close(fd);
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
close(dri2_dpy->fd);
cleanup_display:
free(dri2_dpy);
+ disp->DriverData = NULL;
return _eglError(EGL_NOT_INITIALIZED, err);
}
wl_event_queue_destroy(dri2_dpy->wl_queue);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
wl_event_queue_destroy(dri2_dpy->wl_queue);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
xcb_disconnect(dri2_dpy->conn);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
xcb_disconnect(dri2_dpy->conn);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
xcb_disconnect(dri2_dpy->conn);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
void *private;
void (*resize_callback)(struct wl_egl_window *, void *);
+ void (*destroy_window_callback)(void *);
};
#ifdef __cplusplus
egl_window->surface = surface;
egl_window->private = NULL;
egl_window->resize_callback = NULL;
+ egl_window->destroy_window_callback = NULL;
wl_egl_window_resize(egl_window, width, height, 0, 0);
egl_window->attached_width = 0;
egl_window->attached_height = 0;
WL_EGL_EXPORT void
wl_egl_window_destroy(struct wl_egl_window *egl_window)
{
+ if (egl_window->destroy_window_callback)
+ egl_window->destroy_window_callback(egl_window->private);
free(egl_window);
}
util/u_upload_mgr.h \
util/u_vbuf.c \
util/u_vbuf.h \
- util/u_video.h
+ util/u_video.h \
+ util/u_viewport.h
NIR_SOURCES := \
nir/tgsi_to_nir.c \
if (exponent == 0) {
if (mantissa != 0) {
- const float scale = 1.0 / (1 << 20);
+ const float scale = 1.0 / (1 << 19);
f32.f = scale * mantissa;
}
}
static inline uint64_t
align64(uint64_t value, unsigned alignment)
{
- return (value + alignment - 1) & ~(alignment - 1);
+ return (value + alignment - 1) & ~((uint64_t)alignment - 1);
}
/**
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2016 Ilia Mirkin.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_VIEWPORT_H
+#define U_VIEWPORT_H
+
+#include "c99_compat.h"
+#include "pipe/p_state.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+util_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
+ float *zmin, float *zmax)
+{
+ float a, b;
+ if (halfz) {
+ a = vp->translate[2];
+ b = vp->translate[2] + vp->scale[2];
+ } else {
+ a = vp->translate[2] - vp->scale[2];
+ b = vp->translate[2] + vp->scale[2];
+ }
+
+ *zmin = a < b ? a : b;
+ *zmax = a < b ? b : a;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
{
xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
xshmfence_unmap_shm(buffer->shm_fence);
+ pipe_resource_reference(&buffer->texture, NULL);
FREE(buffer);
}
{
return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
}
-#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080
+#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080
#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000
#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000
#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE 0x00010000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000
#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003
#include "util/u_memory.h"
#include "util/u_helpers.h"
#include "util/u_format.h"
+#include "util/u_viewport.h"
#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
}
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
bool fragz = fp->has_kill | fp->writes_pos;
+ bool clamp = !ctx->rasterizer->depth_clip;
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, zsa->rb_depth_control |
+ COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
+ if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+ float zmin, zmax;
+ int depth = 24;
+ if (ctx->framebuffer.zsbuf) {
+ depth = util_format_get_component_bits(
+ pipe_surface_format(ctx->framebuffer.zsbuf),
+ UTIL_FORMAT_COLORSPACE_ZS, 0);
+ }
+ util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+ &zmin, &zmax);
+
+ OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
+ if (depth == 32) {
+ OUT_RING(ring, fui(zmin));
+ OUT_RING(ring, fui(zmax));
+ } else if (depth == 16) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+ } else {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+ }
+ }
+
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
unsigned n = pfb->nr_cbufs;
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip)
- so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
+ A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
code[0] = 0x00000002 | ((qOp & 1) << 31);
- code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
+ code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
defId(i->def(0), 2);
srcId(i->src(0), 10);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
- if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
- code[1] |= 1 << 9; // dall
-
emitPredicate(i);
}
void
CodeEmitterGM107::emitIADD()
{
- if (!longIMMD(insn->src(1))) {
+ if (insn->src(1).getFile() != FILE_IMMEDIATE) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c100000);
emitX (0x2b);
} else {
emitInsn(0x1c000000);
+ emitNEG (0x38, insn->src(0));
emitSAT (0x36);
emitX (0x35);
emitCC (0x34);
insn->encSize = 8;
for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
- fn->bbArray[i]->binPos += 4;
+ fn->bbArray[i]->binPos += adj;
}
fn->binSize += adj;
insn->bb->binSize += adj;
return;
}
}
- epilogue->binSize -= 8;
- func->binSize -= 8;
+
+ int adj = epilogue->getExit()->encSize;
+ epilogue->binSize -= adj;
+ func->binSize -= adj;
delete_Instruction(func->getProgram(), epilogue->getExit());
+
+ // There may be BB's that are laid out after the exit block
+ for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
+ func->bbArray[i]->binPos -= adj;
+ }
}
void
void
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
- code[0] = 0x00000000 | (laneMask << 6);
+ code[0] = 0x00000200 | (laneMask << 6); // dall
code[1] = 0x48000000 | qOp;
defId(i->def(0), 14);
srcId(i->src(0), 20);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
- if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
- code[0] |= 1 << 9; // dall
-
emitPredicate(i);
}
bb.push(node);
- while (bb.getSize()) {
+ while (bb.getSize() || cross.getSize()) {
+ if (bb.getSize() == 0)
+ cross.moveTo(bb);
+
node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
assert(node);
if (!node->visit(sequence))
}
}
nodes[count++] = node;
-
- if (bb.getSize() == 0)
- cross.moveTo(bb);
}
}
base = 0;
}
+ if (ind) {
+ Value *ptr;
+ ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r));
+ ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+ su->setIndirectR(ptr);
+ }
+
// get surface coordinates
for (c = 0; c < arg; ++c)
src[c] = su->getSrc(c);
struct nv30_miptree *zeta_mt = nv30_miptree(fb->zsbuf->texture);
if (color_mt->swizzled != zeta_mt->swizzled ||
- (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
- (util_format_get_blocksize(fb->cbufs[0]->format) > 2)) {
+ (color_mt->swizzled &&
+ (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
+ (util_format_get_blocksize(fb->cbufs[0]->format) > 2))) {
nv30->framebuffer.zsbuf = NULL;
debug_printf("Mismatched color and zeta formats, ignoring zeta.\n");
}
F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),
F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
- F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+ F3(A, L8_SRGB, NONE, R, R, R, xx, UNORM, R8, T),
F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
- C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+ C4(A, L8A8_SRGB, NONE, R, R, R, G, UNORM, G8R8, T),
C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
#include "util/u_format.h"
+#include "util/u_viewport.h"
#include "nv50/nv50_context.h"
PUSH_DATAf(push, vpt->scale[1]);
PUSH_DATAf(push, vpt->scale[2]);
- zmin = vpt->translate[2] - fabsf(vpt->scale[2]);
- zmax = vpt->translate[2] + fabsf(vpt->scale[2]);
+ /* If the halfz setting ever changes, the viewports will also get
+ * updated. The rast will get updated before the validate function has a
+ * chance to hit, so we can just use it directly without an atom
+ * dependency.
+ */
+ util_viewport_zmin_zmax(vpt, nv50->rast->pipe.clip_halfz, &zmin, &zmax);
#ifdef NV50_SCISSORS_CLIPPING
BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count);
}
-static void
-nvc0_context_get_sample_position(struct pipe_context *pipe,
- unsigned sample_count, unsigned sample_index,
- float *xy)
+const void *
+nvc0_get_sample_locations(unsigned sample_count)
{
static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
static const uint8_t ms2[2][2] = {
case 8: ptr = ms8; break;
default:
assert(0);
- return; /* bad sample count -> undefined locations */
+ return NULL; /* bad sample count -> undefined locations */
}
+ return ptr;
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *pipe,
+ unsigned sample_count, unsigned sample_index,
+ float *xy)
+{
+ const uint8_t (*ptr)[2];
+
+ ptr = nvc0_get_sample_locations(sample_count);
+ if (!ptr)
+ return;
+
xy[0] = ptr[sample_index][0] * 0.0625f;
xy[1] = ptr[sample_index][1] * 0.0625f;
}
void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
bool on_flush);
void nvc0_default_kick_notify(struct nouveau_pushbuf *);
+const void *nvc0_get_sample_locations(unsigned);
/* nvc0_draw.c */
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
#include "util/u_format.h"
#include "util/u_framebuffer.h"
#include "util/u_math.h"
+#include "util/u_viewport.h"
#include "nvc0/nvc0_context.h"
PUSH_DATAf(push, xy[1]);
}
+ if (screen->base.class_3d >= GM200_3D_CLASS) {
+ const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+ uint32_t val[4] = {};
+
+ for (i = 0; i < 16; i++) {
+ val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
+ val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
+ }
+
+ BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+ PUSH_DATAp(push, val, 4);
+ }
+
if (serialize)
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
PUSH_DATA (push, (w << 16) | x);
PUSH_DATA (push, (h << 16) | y);
- zmin = vp->translate[2] - fabsf(vp->scale[2]);
- zmax = vp->translate[2] + fabsf(vp->scale[2]);
+ /* If the halfz setting ever changes, the viewports will also get
+ * updated. The rast will get updated before the validate function has a
+ * chance to hit, so we can just use it directly without an atom
+ * dependency.
+ */
+ util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
PUSH_DATAf(push, zmin);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
- if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
- /* Invalidate all CP textures because they are aliased. */
- for (int i = 0; i < nvc0->num_textures[5]; i++)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
- nvc0->textures_dirty[5] = ~0;
- nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
- }
+ /* Invalidate all CP textures because they are aliased. */
+ for (int i = 0; i < nvc0->num_textures[5]; i++)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
+ nvc0->textures_dirty[5] = ~0;
+ nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
}
bool
PUSH_DATA (nvc0->base.pushbuf, 0);
}
- if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
- /* Invalidate all CP samplers because they are aliased. */
- nvc0->samplers_dirty[5] = ~0;
- nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
- }
+ /* Invalidate all CP samplers because they are aliased. */
+ nvc0->samplers_dirty[5] = ~0;
+ nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
}
/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 2048);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
if (nvc0->vertprog->vp.need_draw_parameters) {
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 2048);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
if (!info->indirect) {
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
+
+ /* Invalidate all 3D samplers because they are aliased. */
+ for (int s = 0; s < 5; s++)
+ nvc0->samplers_dirty[s] = ~0;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
}
+
/* (Code duplicated at bottom for various non-convincing reasons.
* E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
* entries to avoid a subchannel switch.
}
nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ /* Invalidate all 3D textures because they are aliased. */
+ for (int s = 0; s < 5; s++) {
+ for (int i = 0; i < nvc0->num_textures[s]; i++)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
+ nvc0->textures_dirty[s] = ~0;
+ }
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
{
switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ break;
case 2:
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
return;
}
+ /* Carrizo SDMA texture copying is very broken for some users.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=97029
+ */
+ if (sctx->b.family == CHIP_CARRIZO)
+ goto fallback;
+
if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box))
return;
util_memcpy_cpu_to_le32(tmp, ptr, size);
}
-void si_set_constant_buffer(struct si_context *sctx,
- struct si_buffer_resources *buffers,
- uint slot, struct pipe_constant_buffer *input)
+static void si_set_constant_buffer(struct si_context *sctx,
+ struct si_buffer_resources *buffers,
+ uint slot, struct pipe_constant_buffer *input)
{
assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
buffers->desc.dirty_mask |= 1u << slot;
}
+void si_set_rw_buffer(struct si_context *sctx,
+ uint slot, struct pipe_constant_buffer *input)
+{
+ si_set_constant_buffer(sctx, &sctx->rw_buffers, slot, input);
+}
+
static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
uint shader, uint slot,
struct pipe_constant_buffer *input)
* and most other clients can use TC L2 as well, we don't need
* to flush it.
*
- * The only case which requires flushing it is VGT DMA index
- * fetching, which is a rare case. Thus, flag the TC L2
- * dirtiness in the resource and handle it when index fetching
- * is used.
+ * The only cases which requires flushing it is VGT DMA index
+ * fetching (on <= CIK) and indirect draw data, which are rare
+ * cases. Thus, flag the TC L2 dirtiness in the resource and
+ * handle it at draw call time.
*/
for (i = 0; i < sctx->b.streamout.num_targets; i++)
if (sctx->b.streamout.targets[i])
cb.user_buffer = stipple;
cb.buffer_size = sizeof(stipple);
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_PS_CONST_POLY_STIPPLE, &cb);
+ si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
}
/* TEXTURE METADATA ENABLE/DISABLE */
si_mark_atom_dirty(ctx, &ctx->clip_regs);
si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
- si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
+ ctx->msaa_sample_locs.nr_samples = 0;
+ si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
si_mark_atom_dirty(ctx, &ctx->cb_render_state);
si_begin_new_cs(sctx);
r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */
- /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
- * with a NULL buffer). We need to use a dummy buffer instead. */
+ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
+ * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
if (sctx->b.chip_class == CIK) {
sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
PIPE_USAGE_DEFAULT, 16);
}
}
+ si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
+ &sctx->null_const_buf);
+ si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES,
+ &sctx->null_const_buf);
+ si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE,
+ &sctx->null_const_buf);
+ si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS,
+ &sctx->null_const_buf);
+
/* Clear the NULL constant buffer, because loads should return zeros. */
sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
sctx->null_const_buf.buffer->width0, 0,
struct pipe_clip_state state;
};
+struct si_sample_locs {
+ struct r600_atom atom;
+ unsigned nr_samples;
+};
+
struct si_sample_mask {
struct r600_atom atom;
uint16_t sample_mask;
/* Atom declarations. */
struct r600_atom cache_flush;
struct si_framebuffer framebuffer;
- struct r600_atom msaa_sample_locs;
+ struct si_sample_locs msaa_sample_locs;
struct r600_atom db_render_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
radeon_llvm_dispose(&ctx.radeon_bld);
+ /* Validate SGPR and VGPR usage for compute to detect compiler bugs.
+ * LLVM 3.9svn has this bug.
+ */
+ if (sel->type == PIPE_SHADER_COMPUTE) {
+ unsigned *props = sel->info.properties;
+ unsigned wave_size = 64;
+ unsigned max_vgprs = 256;
+ unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
+ unsigned max_sgprs_per_wave = 128;
+ unsigned min_waves_per_cu =
+ DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
+ wave_size);
+ unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
+
+ max_vgprs = max_vgprs / min_waves_per_simd;
+ max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
+
+ if (shader->config.num_sgprs > max_sgprs ||
+ shader->config.num_vgprs > max_vgprs) {
+ fprintf(stderr, "LLVM failed to compile a shader correctly: "
+ "SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n",
+ shader->config.num_sgprs, shader->config.num_vgprs,
+ max_sgprs, max_vgprs);
+
+ /* Just terminate the process, because dependent
+ * shaders can hang due to bad input data, but use
+ * the env var to allow shader-db to work.
+ */
+ if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false))
+ abort();
+ }
+ }
+
/* Add the scratch offset to input SGPRs. */
if (shader->config.scratch_bytes_per_wave)
shader->info.num_input_sgprs += 1; /* scratch byte offset */
S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+ /* Only set dual source blending for MRT0 to avoid a hang. */
+ if (i >= 1 && blend->dual_src_blend)
+ continue;
+
if (!state->rt[j].colormask)
continue;
cb.user_buffer = state->ucp;
cb.buffer_offset = 0;
cb.buffer_size = 4*4*8;
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_VS_CONST_CLIP_PLANES, &cb);
+ si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
return;
if (sctx->framebuffer.nr_samples > 1 &&
- (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
+ (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) {
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ if (sctx->b.family >= CHIP_POLARIS10)
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+ }
+
r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
si_pm4_bind_state(sctx, rasterizer, rs);
assert(0);
}
constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
+ si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
- /* Smoothing (only possible with nr_samples == 1) uses the same
- * sample locations as the MSAA it simulates.
- *
- * Therefore, don't update the sample locations when
- * transitioning from no AA to smoothing-equivalent AA, and
- * vice versa.
- */
- if ((sctx->framebuffer.nr_samples != 1 ||
- old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
- (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
- old_nr_samples != 1))
- si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
}
}
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned nr_samples = sctx->framebuffer.nr_samples;
- cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
- SI_NUM_SMOOTH_AA_SAMPLES);
+ /* Smoothing (only possible with nr_samples == 1) uses the same
+ * sample locations as the MSAA it simulates.
+ */
+ if (nr_samples <= 1 && sctx->smoothing_enabled)
+ nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
+
+ /* On Polaris, the small primitive filter uses the sample locations
+ * even when MSAA is off, so we need to make sure they're set to 0.
+ */
+ if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
+ (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
+ sctx->msaa_sample_locs.nr_samples = nr_samples;
+ cayman_emit_msaa_sample_locs(cs, nr_samples);
+ }
+
+ if (sctx->b.family >= CHIP_POLARIS10) {
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ unsigned small_prim_filter_cntl =
+ S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+ S_028830_LINE_FILTER_DISABLE(1); /* line bug */
+
+ /* The alternative of setting sample locations to 0 would
+ * require a DB flush to avoid Z errors, see
+ * https://bugs.freedesktop.org/show_bug.cgi?id=96908
+ */
+ if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
+ small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
+
+ radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+ small_prim_filter_cntl);
+ }
}
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
(void*)array, sizeof(array),
&cb.buffer_offset);
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
+ si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
- si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+ si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
if (sctx->b.family == CHIP_STONEY)
si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
- if (sctx->b.family >= CHIP_POLARIS10)
- si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
- S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
- S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
-
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
void si_emit_graphics_shader_userdata(struct si_context *sctx,
struct r600_atom *atom);
void si_emit_compute_shader_userdata(struct si_context *sctx);
-void si_set_constant_buffer(struct si_context *sctx,
- struct si_buffer_resources *buffers,
- uint slot, struct pipe_constant_buffer *input);
+void si_set_rw_buffer(struct si_context *sctx,
+ uint slot, struct pipe_constant_buffer *input);
/* si_state.c */
struct si_shader_selector;
r600_resource(ib.buffer)->TC_L2_dirty = false;
}
+ if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ r600_resource(info->indirect)->TC_L2_dirty = false;
+ }
+
/* Check flush flags. */
if (sctx->b.flags)
si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
if (sctx->b.chip_class == SI)
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+
+ if (sctx->framebuffer.nr_samples <= 1)
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
}
}
/* */
#define R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL 0x028830 /* Polaris */
#define S_028830_SMALL_PRIM_FILTER_ENABLE(x) (((x) & 0x1) << 0)
+#define C_028830_SMALL_PRIM_FILTER_ENABLE 0xFFFFFFFE
#define S_028830_TRIANGLE_FILTER_DISABLE(x) (((x) & 0x1) << 1)
#define S_028830_LINE_FILTER_DISABLE(x) (((x) & 0x1) << 2)
#define S_028830_POINT_FILTER_DISABLE(x) (((x) & 0x1) << 3)
assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
- for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+ /* In case the number of samplers and sampler_views doesn't match,
+ * loop over the lower of the two counts.
+ */
+ key->num_textures = MIN2(svga->curr.num_sampler_views[shader],
+ svga->curr.num_samplers[shader]);
+
+ for (i = 0; i < key->num_textures; i++) {
struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
- if (view) {
- assert(svga->curr.sampler[shader][i]);
+ const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+ if (view && sampler) {
assert(view->texture);
assert(view->texture->target < (1 << 4)); /* texture_target:4 */
}
}
- if (!svga->curr.sampler[shader][i]->normalized_coords) {
+ if (!sampler->normalized_coords) {
assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
key->tex[i].width_height_idx = idx++;
key->tex[i].unnormalized = TRUE;
key->tex[i].swizzle_a = view->swizzle_a;
}
}
- key->num_textures = svga->curr.num_sampler_views[shader];
}
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
-AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) -std=c++11
+AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX11_CXXFLAGS)
noinst_LTLIBRARIES = libmesaswr.la
COMMON_CXXFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CXXFLAGS) \
- -std=c++11 \
+ $(SWR_CXX11_CXXFLAGS) \
-I$(builddir)/rasterizer/scripts \
-I$(builddir)/rasterizer/jitter \
-I$(srcdir)/rasterizer \
lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
libswrAVX_la_CXXFLAGS = \
- -march=core-avx-i \
+ $(SWR_AVX_CXXFLAGS) \
-DKNOB_ARCH=KNOB_ARCH_AVX \
$(COMMON_CXXFLAGS)
$(COMMON_LDFLAGS)
libswrAVX2_la_CXXFLAGS = \
- -march=core-avx2 \
+ $(SWR_AVX2_CXXFLAGS) \
-DKNOB_ARCH=KNOB_ARCH_AVX2 \
$(COMMON_CXXFLAGS)
#include <xf86drm.h>
#include <xf86drmMode.h>
+#include "util/u_hash_table.h"
#include "util/u_memory.h"
#include "util/ralloc.h"
uint32_t winsys_stride,
uint32_t handle, uint32_t size)
{
- struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
+ struct vc4_bo *bo;
assert(size);
+ pipe_mutex_lock(screen->bo_handles_mutex);
+
+ bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+ if (bo) {
+ pipe_reference(NULL, &bo->reference);
+ goto done;
+ }
+
+ bo = CALLOC_STRUCT(vc4_bo);
pipe_reference_init(&bo->reference, 1);
bo->screen = screen;
bo->handle = handle;
bo->map = malloc(bo->size);
#endif
+ util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+ pipe_mutex_unlock(screen->bo_handles_mutex);
return bo;
}
bo->handle);
return -1;
}
+
+ pipe_mutex_lock(bo->screen->bo_handles_mutex);
bo->private = false;
+ util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+ pipe_mutex_unlock(bo->screen->bo_handles_mutex);
return fd;
}
#define VC4_BUFMGR_H
#include <stdint.h>
+#include "util/u_hash_table.h"
#include "util/u_inlines.h"
#include "vc4_qir.h"
static inline void
vc4_bo_unreference(struct vc4_bo **bo)
{
+ struct vc4_screen *screen;
if (!*bo)
return;
- if (pipe_reference(&(*bo)->reference, NULL))
- vc4_bo_last_unreference(*bo);
+ if ((*bo)->private) {
+ /* Avoid the mutex for private BOs */
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc4_bo_last_unreference(*bo);
+ } else {
+ screen = (*bo)->screen;
+ pipe_mutex_lock(screen->bo_handles_mutex);
+
+ if (pipe_reference(&(*bo)->reference, NULL)) {
+ util_hash_table_remove(screen->bo_handles,
+ (void *)(uintptr_t)(*bo)->handle);
+ vc4_bo_last_unreference(*bo);
+ }
+
+ pipe_mutex_unlock(screen->bo_handles_mutex);
+ }
+
*bo = NULL;
}
/** bitmask of which inputs are color inputs, for flat shade handling. */
uint32_t color_inputs;
+ bool disable_early_z;
+
uint8_t num_inputs;
/* Byte offsets for the start of the vertex attributes 0-7, and the
vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy);
}
- if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
+ if (vc4->dirty & (VC4_DIRTY_RASTERIZER |
+ VC4_DIRTY_ZSA |
+ VC4_DIRTY_COMPILED_FS)) {
uint8_t ez_enable_mask_out = ~0;
/* HW-2905: If the RCL ends up doing a full-res load when
* was seeing bad rendering on glxgears -samples 4 even in
* that case.
*/
- if (vc4->msaa)
+ if (vc4->msaa || vc4->prog.fs->disable_early_z)
ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z;
cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
* sources are independent of previous instructions
*/
if (temps == 1) {
- list_del(&inst->link);
inst->src[j] = mov->src[0];
- list_replace(&mov->link, &inst->link);
- c->defs[temp] = NULL;
- free(mov);
+
+ list_del(&inst->link);
+ list_addtail(&inst->link, &mov->link);
+ qir_remove_instruction(c, mov);
+
progress = true;
break;
}
shader->input_slots[shader->num_inputs] = *slot;
shader->num_inputs++;
}
+
+ /* Note: the temporary clone in c->s has been freed. */
+ nir_shader *orig_shader = key->shader_state->base.ir.nir;
+ if (orig_shader->info.outputs_written & (1 << FRAG_RESULT_DEPTH))
+ shader->disable_early_z = true;
} else {
shader->num_inputs = c->num_inputs;
struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl);
struct pipe_resource *prsc = &rsc->base.b;
struct vc4_resource_slice *slice = &rsc->slices[0];
- uint32_t expected_stride = align(prsc->width0 / rsc->cpp,
- vc4_utile_width(rsc->cpp));
+ uint32_t expected_stride =
+ align(prsc->width0, vc4_utile_width(rsc->cpp)) * rsc->cpp;
if (!rsc)
return NULL;
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_format.h"
+#include "util/u_hash_table.h"
#include "util/ralloc.h"
#include "vc4_screen.h"
static void
vc4_screen_destroy(struct pipe_screen *pscreen)
{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+
+ util_hash_table_destroy(screen->bo_handles);
vc4_bufmgr_destroy(pscreen);
+ close(screen->fd);
ralloc_free(pscreen);
}
return retval == usage;
}
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
struct pipe_screen *
vc4_screen_create(int fd)
{
screen->fd = fd;
list_inithead(&screen->bo_cache.time_list);
+ pipe_mutex_init(screen->bo_handles_mutex);
+ screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
vc4_fence_init(screen);
uint32_t bo_count;
} bo_cache;
+ struct util_hash_table *bo_handles;
+ pipe_mutex bo_handles_mutex;
+
uint32_t bo_size;
uint32_t bo_count;
};
struct pipe_blend_color
{
- /**
- * Making the color array explicitly 16-byte aligned provides a hint to
- * compilers to make more efficient auto-vectorization optimizations.
- * The actual performance gains from vectorizing the blend color array are
- * fairly minimal, if any, but the alignment is necessary to work around
- * buggy vectorization in some compilers which fail to generate the correct
- * unaligned accessors resulting in a segfault. Specifically several
- * versions of the Intel compiler are known to be affected but it's likely
- * others are as well.
- */
- PIPE_ALIGN_VAR(16) float color[4];
+ float color[4];
};
{
struct encode_task *i, *next;
- if (!head)
- return;
+ if (!head || !head->next)
+ return;
LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) {
pipe_resource_reference(&i->bitstream, NULL);
include $(top_srcdir)/src/gallium/Automake.inc
+if HAVE_ANDROID
+if HAVE_SHARED_GLAPI
+SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+endif
+endif
+
AM_CFLAGS = \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
$(top_builddir)/src/gallium/drivers/noop/libnoop.la \
$(top_builddir)/src/gallium/drivers/rbug/librbug.la \
$(top_builddir)/src/gallium/drivers/trace/libtrace.la \
+ $(SHARED_GLAPI_LIB) \
$(SELINUX_LIBS) \
$(EXPAT_LIBS) \
$(LIBDRM_LIBS) \
AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
!(surf->flags & RADEON_SURF_SCANOUT) &&
!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
- !compressed && AddrDccIn.numSamples <= 1;
+ !compressed && AddrDccIn.numSamples <= 1 &&
+ surf->last_level == 0;
/* This disables incorrect calculations (hacks) in addrlib. */
AddrSurfInfoIn.flags.noStencil = 1;
#include "util/u_memory.h"
#include "pipe/p_compiler.h"
#include "util/u_hash_table.h"
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
#include <sys/stat.h>
#include <unistd.h>
}
static struct kms_sw_displaytarget *
-kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
+kms_sw_displaytarget_find_and_ref(struct kms_sw_winsys *kms_sw,
+ unsigned int kms_handle)
+{
+ struct kms_sw_displaytarget *kms_sw_dt;
+
+ LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
+ if (kms_sw_dt->handle == kms_handle) {
+ kms_sw_dt->ref_count++;
+
+ DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n",
+ kms_sw_dt->handle, kms_sw_dt->size);
+
+ return kms_sw_dt;
+ }
+ }
+
+ return NULL;
+}
+
+static struct kms_sw_displaytarget *
+kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
+ unsigned width, unsigned height,
+ unsigned stride)
{
uint32_t handle = -1;
struct kms_sw_displaytarget * kms_sw_dt;
if (ret)
return NULL;
+ kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, handle);
+ if (kms_sw_dt)
+ return kms_sw_dt;
+
kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget);
if (!kms_sw_dt)
return NULL;
kms_sw_dt->ref_count = 1;
kms_sw_dt->handle = handle;
kms_sw_dt->size = lseek(fd, 0, SEEK_END);
+ kms_sw_dt->width = width;
+ kms_sw_dt->height = height;
+ kms_sw_dt->stride = stride;
if (kms_sw_dt->size == (off_t)-1) {
FREE(kms_sw_dt);
switch(whandle->type) {
case DRM_API_HANDLE_TYPE_FD:
- kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle);
- if (kms_sw_dt) {
- kms_sw_dt->ref_count++;
- kms_sw_dt->width = templ->width0;
- kms_sw_dt->height = templ->height0;
- kms_sw_dt->stride = whandle->stride;
+ kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle,
+ templ->width0,
+ templ->height0,
+ whandle->stride);
+ if (kms_sw_dt)
*stride = kms_sw_dt->stride;
- }
return (struct sw_displaytarget *)kms_sw_dt;
case DRM_API_HANDLE_TYPE_KMS:
- LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
- if (kms_sw_dt->handle == whandle->handle) {
- kms_sw_dt->ref_count++;
-
- DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size);
-
- *stride = kms_sw_dt->stride;
- return (struct sw_displaytarget *)kms_sw_dt;
- }
+ kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, whandle->handle);
+ if (kms_sw_dt) {
+ *stride = kms_sw_dt->stride;
+ return (struct sw_displaytarget *)kms_sw_dt;
}
/* fallthrough */
default:
#include <string.h>
#include <stdint.h>
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
uint_fast16_t minorCode, bool coreX11error)
{
struct glx_display *glx_dpy = __glXInitialize(dpy);
- struct glx_context *gc = __glXGetCurrentContext();
xError error;
assert(glx_dpy);
- assert(gc);
LockDisplay(dpy);
error.sequenceNumber = dpy->request;
error.resourceID = resourceID;
error.minorCode = minorCode;
- error.majorCode = gc ? gc->majorOpcode : 0;
+ error.majorCode = glx_dpy->majorOpcode;
_XError(dpy, &error);
* the calling thread's current context a GLXBadDrawable error is
* generated."
*/
- if (pdraw == NULL || gc == NULL || gc->currentDpy != dpy ||
+ if (pdraw == NULL || gc == &dummyContext || gc->currentDpy != dpy ||
(gc->currentDrawable != drawable &&
gc->currentReadable != drawable)) {
__glXSendError(dpy, GLXBadDrawable, drawable,
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc && gc->vtable->wait_gl)
+ if (gc != &dummyContext && gc->vtable->wait_gl)
gc->vtable->wait_gl(gc);
}
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc && gc->vtable->wait_x)
+ if (gc != &dummyContext && gc->vtable->wait_x)
gc->vtable->wait_x(gc);
}
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc && gc->vtable->use_x_font)
+ if (gc != &dummyContext && gc->vtable->use_x_font)
gc->vtable->use_x_font(gc, font, first, count, listBase);
}
__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
if (pdraw != NULL) {
- Bool flush = gc && drawable == gc->currentDrawable;
+ Bool flush = gc != &dummyContext && drawable == gc->currentDrawable;
(*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0, flush);
return;
** The calling thread may or may not have a current context. If it
** does, send the context tag so the server can do a flush.
*/
- if ((gc != NULL) && (dpy == gc->currentDpy) &&
+ if ((gc != &dummyContext) && (dpy == gc->currentDpy) &&
((drawable == gc->currentDrawable)
|| (drawable == gc->currentReadable))) {
tag = gc->currentContextTag;
glXGetCurrentDisplay(void)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (NULL == gc)
+ if (gc == &dummyContext)
return NULL;
return gc->currentDpy;
}
CARD32 *interval_ptr;
CARD8 opcode;
- if (gc == NULL) {
+ if (gc == &dummyContext) {
return GLX_BAD_CONTEXT;
}
#ifdef GLX_DIRECT_RENDERING
struct glx_context *gc = __glXGetCurrentContext();
- if (gc != NULL && gc->isDirect) {
+ if (gc != &dummyContext && gc->isDirect) {
struct glx_screen *psc;
psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
#ifdef GLX_DIRECT_RENDERING
struct glx_context *gc = __glXGetCurrentContext();
- if (gc != NULL && gc->isDirect) {
+ if (gc != &dummyContext && gc->isDirect) {
struct glx_screen *psc;
psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
__GLXDRIdrawable *pdraw;
#endif
- if (!gc)
+ if (gc == &dummyContext)
return GLX_BAD_CONTEXT;
#ifdef GLX_DIRECT_RENDERING
if (divisor <= 0 || remainder < 0)
return GLX_BAD_VALUE;
- if (!gc)
+ if (gc == &dummyContext)
return GLX_BAD_CONTEXT;
#ifdef GLX_DIRECT_RENDERING
struct glx_screen *psc = pdraw ? pdraw->psc : NULL;
#endif
- if (!gc) /* no GLX for this */
+ if (gc == &dummyContext) /* no GLX for this */
return -1;
#ifdef GLX_DIRECT_RENDERING
** does, send the context tag so the server can do a flush.
*/
gc = __glXGetCurrentContext();
- if ((gc != NULL) && (dpy == gc->currentDpy) &&
+ if ((gc != &dummyContext) && (dpy == gc->currentDpy) &&
((drawable == gc->currentDrawable) ||
(drawable == gc->currentReadable))) {
tag = gc->currentContextTag;
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL || gc->vtable->bind_tex_image == NULL)
+ if (gc == &dummyContext || gc->vtable->bind_tex_image == NULL)
return;
gc->vtable->bind_tex_image(dpy, drawable, buffer, attrib_list);
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL || gc->vtable->release_tex_image == NULL)
+ if (gc == &dummyContext || gc->vtable->release_tex_image == NULL)
return;
gc->vtable->release_tex_image(dpy, drawable, buffer);
static unsigned FindGLXFunction(const GLubyte *name)
{
- unsigned first = 0;
- unsigned last = DI_FUNCTION_COUNT - 1;
+ int first = 0;
+ int last = DI_FUNCTION_COUNT - 1;
while (first <= last) {
- unsigned middle = (first + last) / 2;
- int comp = strcmp((const char *) name,
- __glXDispatchTableStrings[middle]);
+ int middle = (first + last) / 2;
+ int comp = strcmp(__glXDispatchTableStrings[middle],
+ (const char *) name);
if (comp < 0)
first = middle + 1;
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL)
+ if (gc == &dummyContext)
return False;
return __glXQueryRendererInteger(gc->psc, attribute, value);
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL)
+ if (gc == &dummyContext)
return False;
return __glXQueryRendererString(gc->psc, attribute);
return indirect_create_context(base, config_base, shareList, 0);
}
-__thread void *__glX_tls_Context = NULL;
+/* This is necessary so that we don't have to link with glxcurrent.c
+ * which would require us to link with X libraries and what not.
+ */
+GLubyte dummyBuffer[__GLX_BUFFER_LIMIT_SIZE];
+struct glx_context_vtable dummyVtable;
+struct glx_context dummyContext = {
+ &dummyBuffer[0],
+ &dummyBuffer[0],
+ &dummyBuffer[0],
+ &dummyBuffer[__GLX_BUFFER_LIMIT_SIZE],
+ sizeof(dummyBuffer),
+ &dummyVtable
+};
+__thread void *__glX_tls_Context = &dummyContext;
#if !defined(GLX_USE_TLS)
extern "C" struct glx_context *
</group>
</struct>
- <struct name="BLEND_STATE" length="2">
+ <struct name="BLEND_STATE_ENTRY" length="2">
<field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
<field name="Alpha Blend Function" start="26" end="28" type="uint">
<field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
</struct>
+ <struct name="BLEND_STATE" length="16">
+ <group count="8" start="0" size="64">
+ <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+ </group>
+ </struct>
+
<struct name="CC_VIEWPORT" length="2">
<field name="Minimum Depth" start="0" end="31" type="float"/>
<field name="Maximum Depth" start="32" end="63" type="float"/>
<field name="CLIP Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
<value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
<field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
</group>
</struct>
- <struct name="BLEND_STATE" length="2">
+ <struct name="BLEND_STATE_ENTRY" length="2">
<field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
<field name="Alpha Blend Function" start="26" end="28" type="uint">
<field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
</struct>
+ <struct name="BLEND_STATE" length="16">
+ <group count="8" start="0" size="64">
+ <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+ </group>
+ </struct>
+
<struct name="CC_VIEWPORT" length="2">
<field name="Minimum Depth" start="0" end="31" type="float"/>
<field name="Maximum Depth" start="32" end="63" type="float"/>
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
<value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
<field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
</group>
</struct>
- <struct name="BLEND_STATE" length="2">
+ <struct name="BLEND_STATE_ENTRY" length="2">
<field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
<field name="Alpha Blend Function" start="26" end="28" type="uint">
<field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
</struct>
+ <struct name="BLEND_STATE" length="16">
+ <group count="8" start="0" size="64">
+ <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+ </group>
+ </struct>
+
<struct name="CC_VIEWPORT" length="2">
<field name="Minimum Depth" start="0" end="31" type="float"/>
<field name="Maximum Depth" start="32" end="63" type="float"/>
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
<value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
<field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
<field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/>
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
- <value name="OGL" value="0"/>
+ <value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/>
<field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/>
<field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/>
<field name="Clip Mode" start="77" end="79" type="uint">
- <value name="NORMAL" value="0"/>
- <value name="REJECT_ALL" value="3"/>
- <value name="ACCEPT_ALL" value="4"/>
+ <value name="CLIPMODE_NORMAL" value="0"/>
+ <value name="CLIPMODE_REJECT_ALL" value="3"/>
+ <value name="CLIPMODE_ACCEPT_ALL" value="4"/>
</field>
<field name="Perspective Divide Disable" start="73" end="73" type="bool"/>
<field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/>
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/>
<field name="DWord Length" start="0" end="7" type="uint" default="3"/>
<field name="SO Function Enable" start="63" end="63" type="uint"/>
- <field name="API Rendering Disable" start="62" end="62" type="uint"/>
+ <field name="Rendering Disable" start="62" end="62" type="uint"/>
<field name="Render Stream Select" start="59" end="60" type="uint"/>
<field name="Reorder Mode" start="58" end="58" type="uint">
<value name="LEADING" value="0"/>
<field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/>
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
- <value name="OGL" value="0"/>
+ <value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/>
<field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/>
<field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/>
<field name="Clip Mode" start="77" end="79" type="uint">
- <value name="NORMAL" value="0"/>
- <value name="REJECT_ALL" value="3"/>
- <value name="ACCEPT_ALL" value="4"/>
+ <value name="CLIPMODE_NORMAL" value="0"/>
+ <value name="CLIPMODE_REJECT_ALL" value="3"/>
+ <value name="CLIPMODE_ACCEPT_ALL" value="4"/>
</field>
<field name="Perspective Divide Disable" start="73" end="73" type="bool"/>
<field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/>
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/>
<field name="DWord Length" start="0" end="7" type="uint" default="3"/>
<field name="SO Function Enable" start="63" end="63" type="uint"/>
- <field name="API Rendering Disable" start="62" end="62" type="uint"/>
+ <field name="Rendering Disable" start="62" end="62" type="uint"/>
<field name="Render Stream Select" start="59" end="60" type="uint"/>
<field name="Reorder Mode" start="58" end="58" type="uint">
<value name="LEADING" value="0"/>
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/include
-libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
+AM_CFLAGS = \
+ $(VISIBILITY_CFLAGS) \
+ -Wno-override-init
libisl_la_LIBADD = $(ISL_GEN_LIBS)
libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES)
libisl_gen7_la_SOURCES = $(ISL_GEN7_FILES)
-libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
+libisl_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
libisl_gen75_la_SOURCES = $(ISL_GEN75_FILES)
-libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75
+libisl_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75
libisl_gen8_la_SOURCES = $(ISL_GEN8_FILES)
-libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80
+libisl_gen8_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=80
libisl_gen9_la_SOURCES = $(ISL_GEN9_FILES)
-libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90
+libisl_gen9_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=90
BUILT_SOURCES = $(ISL_GENERATED_FILES)
case ISL_MSAA_LAYOUT_ARRAY:
assert(info->depth == 1);
- assert(info->array_len == 1);
+ assert(info->levels == 1);
assert(!isl_format_is_compressed(info->format));
*phys_level0_sa = (struct isl_extent4d) {
.w = info->width,
.h = info->height,
.d = 1,
- .a = info->samples,
+ .a = info->array_len * info->samples,
};
break;
case ISL_MSAA_LAYOUT_INTERLEAVED:
assert(info->depth == 1);
- assert(info->array_len == 1);
+ assert(info->levels == 1);
assert(!isl_format_is_compressed(info->format));
*phys_level0_sa = (struct isl_extent4d) {
.w = info->width,
.h = info->height,
.d = 1,
- .a = 1,
+ .a = info->array_len,
};
isl_msaa_interleaved_scale_px_to_sa(info->samples,
static inline bool
isl_tiling_is_any_y(enum isl_tiling tiling)
{
- return (1u << tiling) & ISL_TILING_ANY_MASK;
+ return (1u << tiling) & ISL_TILING_ANY_Y_MASK;
}
static inline bool
uint32_t num_elements = info->size / info->stride;
if (GEN_GEN >= 7) {
+ /* From the IVB PRM, SURFACE_STATE::Height,
+ *
+ * For typed buffer and structured buffer surfaces, the number
+ * of entries in the buffer ranges from 1 to 2^27. For raw buffer
+ * surfaces, the number of entries in the buffer is the number of bytes
+ * which can range from 1 to 2^30.
+ */
if (info->format == ISL_FORMAT_RAW) {
- assert(num_elements <= (1ull << 31));
+ assert(num_elements <= (1ull << 30));
assert((num_elements & 3) == 0);
} else {
assert(num_elements <= (1ull << 27));
-I$(top_builddir)/src/intel \
-I$(top_srcdir)/src/intel
-AM_CFLAGS = -Wno-override-init -msse2
+AM_CFLAGS = \
+ $(VISIBILITY_CFLAGS) \
+ -Wno-override-init -msse2
libanv_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
libanv_gen7_la_SOURCES = $(GEN7_FILES)
-module \
-no-undefined \
-avoid-version \
+ $(BSYMBOLIC) \
$(GC_SECTIONS) \
$(LD_NO_UNDEFINED)
switch (cmd_buffer->device->info.gen) {
case 7:
if (cmd_buffer->device->info.is_haswell)
- return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+ return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
else
return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
case 8:
{
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
- struct anv_pipeline_bind_map *map;
+ struct anv_pipeline *pipeline;
uint32_t bias, state_offset;
switch (stage) {
case MESA_SHADER_COMPUTE:
- map = &cmd_buffer->state.compute_pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.compute_pipeline;
bias = 1;
break;
default:
- map = &cmd_buffer->state.pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.pipeline;
bias = 0;
break;
}
+ if (!anv_pipeline_has_stage(pipeline, stage)) {
+ *bt_state = (struct anv_state) { 0, };
+ return VK_SUCCESS;
+ }
+
+ struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (bias + map->surface_count == 0) {
*bt_state = (struct anv_state) { 0, };
return VK_SUCCESS;
anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage, struct anv_state *state)
{
- struct anv_pipeline_bind_map *map;
+ struct anv_pipeline *pipeline;
if (stage == MESA_SHADER_COMPUTE)
- map = &cmd_buffer->state.compute_pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.compute_pipeline;
else
- map = &cmd_buffer->state.pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.pipeline;
+ if (!anv_pipeline_has_stage(pipeline, stage)) {
+ *state = (struct anv_state) { 0, };
+ return VK_SUCCESS;
+ }
+
+ struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (map->sampler_count == 0) {
*state = (struct anv_state) { 0, };
return VK_SUCCESS;
anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage)
{
+ /* If we don't have this stage, bail. */
+ if (!anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage))
+ return (struct anv_state) { .offset = 0 };
+
struct anv_push_constants *data =
cmd_buffer->state.push_constants[stage];
const struct brw_stage_prog_data *prog_data =
- cmd_buffer->state.pipeline->prog_data[stage];
+ anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]);
/* If we don't actually have any push constants, bail. */
if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
#include <unistd.h>
#include <fcntl.h>
+#include "util/mesa-sha1.h"
+
#include "anv_private.h"
/*
struct anv_sampler **samplers =
(struct anv_sampler **)&set_layout->binding[max_binding + 1];
+ memset(set_layout, 0, sizeof(*set_layout));
set_layout->binding_count = max_binding + 1;
- set_layout->shader_stages = 0;
- set_layout->size = 0;
for (uint32_t b = 0; b <= max_binding; b++) {
/* Initialize all binding_layout entries to -1 */
anv_free2(&device->alloc, pAllocator, set_layout);
}
+static void
+sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
+ const struct anv_descriptor_set_layout *layout)
+{
+ size_t size = sizeof(*layout) +
+ sizeof(layout->binding[0]) * layout->binding_count;
+ _mesa_sha1_update(ctx, layout, size);
+}
+
/*
* Pipeline layouts. These have nothing to do with the pipeline. They are
* just muttiple descriptor set layouts pasted together
}
}
+ struct mesa_sha1 *ctx = _mesa_sha1_init();
+ for (unsigned s = 0; s < layout->num_sets; s++) {
+ sha1_update_descriptor_set_layout(ctx, layout->set[s].layout);
+ _mesa_sha1_update(ctx, &layout->set[s].dynamic_offset_start,
+ sizeof(layout->set[s].dynamic_offset_start));
+ }
+ _mesa_sha1_update(ctx, &layout->num_sets, sizeof(layout->num_sets));
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ _mesa_sha1_update(ctx, &layout->stage[s].has_dynamic_offsets,
+ sizeof(layout->stage[s].has_dynamic_offsets));
+ }
+ _mesa_sha1_final(ctx, layout->sha1);
+
*pPipelineLayout = anv_pipeline_layout_to_handle(layout);
return VK_SUCCESS;
(struct anv_buffer_view *) &set->descriptors[layout->size];
set->buffer_count = layout->buffer_count;
+ /* By defining the descriptors to be zero now, we can later verify that
+ * a descriptor has not been populated with user data.
+ */
+ memset(set->descriptors, 0, sizeof(struct anv_descriptor) * layout->size);
+
/* Go through and fill out immutable samplers if we have any */
struct anv_descriptor *desc = set->descriptors;
for (uint32_t b = 0; b < layout->binding_count; b++) {
.robustBufferAccess = true,
.fullDrawIndexUint32 = true,
.imageCubeArray = false,
- .independentBlend = pdevice->info->gen >= 8,
+ .independentBlend = true,
.geometryShader = true,
.tessellationShader = false,
.sampleRateShading = false,
const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
+ /* See assertions made when programming the buffer surface state. */
+ const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
+ (1ul << 30) : (1ul << 27);
+
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
.maxImageDimensionCube = (1 << 14),
.maxImageArrayLayers = (1 << 11),
.maxTexelBufferElements = 128 * 1024 * 1024,
- .maxUniformBufferRange = UINT32_MAX,
- .maxStorageBufferRange = UINT32_MAX,
+ .maxUniformBufferRange = (1ul << 27),
+ .maxStorageBufferRange = max_raw_buffer_sz,
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = UINT32_MAX,
.maxSamplerAllocationCount = 64 * 1024,
return anv_lookup_entrypoint(pName);
}
-/* The loader wants us to expose a second GetInstanceProcAddr function
- * to work around certain LD_PRELOAD issues seen in apps.
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
*/
+PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName);
+PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName)
&device->dynamic_state_block_pool);
anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
- anv_pipeline_cache_init(&device->default_pipeline_cache, device);
+ anv_state_pool_init(&device->instruction_state_pool,
+ &device->instruction_block_pool);
anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
anv_bo_pool_finish(&device->batch_bo_pool);
anv_state_pool_finish(&device->dynamic_state_pool);
anv_block_pool_finish(&device->dynamic_state_block_pool);
+ anv_state_pool_finish(&device->instruction_state_pool);
anv_block_pool_finish(&device->instruction_block_pool);
anv_state_pool_finish(&device->surface_state_pool);
anv_block_pool_finish(&device->surface_state_block_pool);
anv_free2(&device->alloc, pAllocator, fb);
}
-
-void vkCmdDbgMarkerBegin(
- VkCommandBuffer commandBuffer,
- const char* pMarker)
- __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerEnd(
- VkCommandBuffer commandBuffer)
- __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerBegin(
- VkCommandBuffer commandBuffer,
- const char* pMarker)
-{
-}
-
-void vkCmdDbgMarkerEnd(
- VkCommandBuffer commandBuffer)
-{
-}
print "%s gen75_%s%s;" % (type, name, args)
print "%s gen8_%s%s;" % (type, name, args)
print "%s gen9_%s%s;" % (type, name, args)
- print "%s anv_validate_%s%s;" % (type, name, args)
print_guard_end(name)
exit()
print " \"vk%s\\0\"" % name
offsets.append(i)
i += 2 + len(name) + 1
-print """ ;
+print " ;"
-/* Weak aliases for all potential validate functions. These will resolve to
- * NULL if they're not defined, which lets the resolve_entrypoint() function
- * either pick a validate wrapper if available or just plug in the actual
- * entry point.
- */
-"""
-
-# Now generate the table of all entry points and their validation functions
+# Now generate the table of all entry points
print "\nstatic const struct anv_entrypoint entrypoints[] = {"
for type, name, args, num, h in entrypoints:
print " { %5d, 0x%08x }," % (offsets[num], h)
print "};\n"
-for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
+print """
+
+/* Weak aliases for all potential implementations. These will resolve to
+ * NULL if they're not defined, which lets the resolve_entrypoint() function
+ * either pick the correct entry point.
+ */
+"""
+
+for layer in [ "anv", "gen7", "gen75", "gen8", "gen9" ]:
for type, name, args, num, h in entrypoints:
print_guard_start(name)
print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
print "};\n"
print """
-#ifdef DEBUG
-static bool enable_validate = true;
-#else
-static bool enable_validate = false;
-#endif
-
-/* We can't use symbols that need resolving (like, oh, getenv) in the resolve
- * function. This means that we have to determine whether or not to use the
- * validation layer sometime before that. The constructor function attribute asks
- * the dynamic linker to invoke determine_validate() at dlopen() time which
- * works.
- */
-static void __attribute__ ((constructor))
-determine_validate(void)
-{
- const char *s = getenv("ANV_VALIDATE");
-
- if (s)
- enable_validate = atoi(s);
-}
-
static const struct brw_device_info *dispatch_devinfo;
void
void * __attribute__ ((noinline))
anv_resolve_entrypoint(uint32_t index)
{
- if (enable_validate && validate_layer.entrypoints[index])
- return validate_layer.entrypoints[index];
-
if (dispatch_devinfo == NULL) {
return anv_layer.entrypoints[index];
}
}
"""
-# Now output ifuncs and their resolve helpers for all entry points. The
-# resolve helper calls resolve_entrypoint() with the entry point index, which
-# lets the resolver look it up in the table.
-
-for type, name, args, num, h in entrypoints:
- print_guard_start(name)
- print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num)
- print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name)
- print_guard_end(name)
-
-
# Now generate the hash table used for entry point look up. This is a
# uint16_t table of entry point indices. We use 0xffff to indicate an entry
# in the hash table is empty.
}
}
-VkResult
-anv_validate_CreateImageView(VkDevice _device,
- const VkImageViewCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkImageView *pView)
-{
- ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
- const VkImageSubresourceRange *subresource;
-
- /* Validate structure type before dereferencing it. */
- assert(pCreateInfo);
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO);
- subresource = &pCreateInfo->subresourceRange;
-
- /* Validate viewType is in range before using it. */
- assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE);
- assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE);
-
- /* Validate format is in range before using it. */
- assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE);
- assert(pCreateInfo->format <= VK_FORMAT_END_RANGE);
-
- /* Validate channel swizzles. */
- assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE);
- assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE);
- assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE);
- assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE);
-
- /* Validate subresource. */
- assert(subresource->aspectMask != 0);
- assert(subresource->levelCount > 0);
- assert(subresource->layerCount > 0);
- assert(subresource->baseMipLevel < image->levels);
- assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels);
- assert(subresource->baseArrayLayer < image->array_size);
- assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size);
- assert(pView);
-
- MAYBE_UNUSED const VkImageAspectFlags view_format_aspects =
- vk_format_aspects(pCreateInfo->format);
-
- const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT
- | VK_IMAGE_ASPECT_STENCIL_BIT;
-
- /* Validate format. */
- if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(view_format_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- } else if (subresource->aspectMask & ds_flags) {
- assert((subresource->aspectMask & ~ds_flags) == 0);
-
- assert(pCreateInfo->format == image->vk_format);
-
- if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
- assert(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
- assert(view_format_aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
- }
-
- if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
- /* FINISHME: Is it legal to have an R8 view of S8? */
- assert(image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
- assert(view_format_aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
- }
- } else {
- assert(!"bad VkImageSubresourceRange::aspectFlags");
- }
-
- return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView);
-}
-
static struct anv_state
alloc_surface_state(struct anv_device *device,
struct anv_cmd_buffer *cmd_buffer)
view->format = anv_get_isl_format(&device->info, pCreateInfo->format,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_TILING_LINEAR);
+ const uint32_t format_bs = isl_format_get_layout(view->format)->bs;
view->bo = buffer->bo;
view->offset = buffer->offset + pCreateInfo->offset;
view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
- buffer->size - view->offset : pCreateInfo->range;
+ buffer->size - pCreateInfo->offset : pCreateInfo->range;
+ view->range = align_down_npot_u32(view->range, format_bs);
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) {
view->surface_state = alloc_surface_state(device, cmd_buffer);
anv_fill_buffer_surface_state(device, view->surface_state,
view->format,
- view->offset, view->range,
- isl_format_get_layout(view->format)->bs);
+ view->offset, view->range, format_bs);
} else {
view->surface_state = (struct anv_state){ 0 };
}
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
- nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+ nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
return b.shader;
}
}
}
+/* This function returns the format corresponding to a single component of the
+ * RGB format for the given size returned by vk_format_for_size().
+ */
+static VkFormat
+vk_single_component_format_for_rgb_size(int bs)
+{
+ switch (bs) {
+ case 3: return VK_FORMAT_R8_UNORM;
+ case 6: return VK_FORMAT_R16_UNORM;
+ case 12: return VK_FORMAT_R32_UINT;
+ default:
+ unreachable("Invalid format block size");
+ }
+}
+
static void
create_iview(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_blit2d_surf *surf,
VkImageUsageFlags usage,
uint32_t width,
uint32_t height,
+ VkFormat format,
VkImage *img,
struct anv_image_view *iview)
{
const VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
- .format = vk_format_for_size(surf->bs),
+ .format = format,
.extent = {
.width = width,
.height = height,
create_iview(cmd_buffer, src, offset, VK_IMAGE_USAGE_SAMPLED_BIT,
rect->src_x + rect->width, rect->src_y + rect->height,
+ vk_format_for_size(src->bs),
&tmp->image, &tmp->iview);
anv_CreateDescriptorPool(vk_device,
uint64_t offset,
uint32_t width,
uint32_t height,
+ VkFormat format,
struct blit2d_dst_temps *tmp)
{
create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
- width, height, &tmp->image, &tmp->iview);
+ width, height, format, &tmp->image, &tmp->iview);
anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device),
&(VkFramebufferCreateInfo) {
struct blit2d_dst_temps dst_temps;
blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width,
- rects[r].dst_y + rects[r].height, &dst_temps);
+ rects[r].dst_y + rects[r].height,
+ vk_format_for_size(dst->bs), &dst_temps);
struct blit_vb_data {
float pos[2];
};
struct blit2d_dst_temps dst_temps;
- blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, &dst_temps);
+ blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y,
+ VK_FORMAT_R8_UINT, &dst_temps);
struct blit_vb_header {
struct anv_vue_header vue;
}
}
+static void
+anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_blit2d_surf *src,
+ enum blit2d_src_type src_type,
+ struct anv_meta_blit2d_surf *dst,
+ unsigned num_rects,
+ struct anv_meta_blit2d_rect *rects)
+{
+ struct anv_device *device = cmd_buffer->device;
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ struct blit2d_src_temps src_temps;
+ blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
+
+ assert(dst->bs % 3 == 0);
+ assert(dst->tiling == ISL_TILING_LINEAR);
+
+ uint32_t offset;
+ isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+ dst->tiling, 1, dst->pitch,
+ rects[r].dst_x, rects[r].dst_y,
+ &offset,
+ &rects[r].dst_x, &rects[r].dst_y);
+
+ /* A red surface three times as wide as the actual RGB destination */
+ struct anv_meta_blit2d_surf dst_R = {
+ .bo = dst->bo,
+ .tiling = dst->tiling,
+ .base_offset = dst->base_offset,
+ .bs = dst->bs / 3,
+ .pitch = dst->pitch,
+ };
+
+ struct blit2d_dst_temps dst_temps;
+ blit2d_bind_dst(cmd_buffer, &dst_R, offset,
+ (rects[r].dst_x + rects[r].width) * 3,
+ rects[r].dst_y + rects[r].height,
+ vk_single_component_format_for_rgb_size(dst->bs),
+ &dst_temps);
+
+ struct blit_vb_data {
+ float pos[2];
+ float tex_coord[3];
+ } *vb_data;
+
+ unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+ struct anv_state vb_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+ vb_data[0] = (struct blit_vb_data) {
+ .pos = {
+ (rects[r].dst_x + rects[r].width) * 3,
+ rects[r].dst_y + rects[r].height,
+ },
+ .tex_coord = {
+ rects[r].src_x + rects[r].width,
+ rects[r].src_y + rects[r].height,
+ src->pitch,
+ },
+ };
+
+ vb_data[1] = (struct blit_vb_data) {
+ .pos = {
+ rects[r].dst_x * 3,
+ rects[r].dst_y + rects[r].height,
+ },
+ .tex_coord = {
+ rects[r].src_x,
+ rects[r].src_y + rects[r].height,
+ src->pitch,
+ },
+ };
+
+ vb_data[2] = (struct blit_vb_data) {
+ .pos = {
+ rects[r].dst_x * 3,
+ rects[r].dst_y,
+ },
+ .tex_coord = {
+ rects[r].src_x,
+ rects[r].src_y,
+ src->pitch,
+ },
+ };
+
+ if (!device->info.has_llc)
+ anv_state_clflush(vb_state);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = vb_size,
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = vb_state.offset,
+ };
+
+ anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ (VkBuffer[]) {
+ anv_buffer_to_handle(&vertex_buffer),
+ anv_buffer_to_handle(&vertex_buffer)
+ },
+ (VkDeviceSize[]) {
+ 0,
+ sizeof(struct anv_vue_header),
+ });
+
+ ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit2d.render_pass,
+ .framebuffer = dst_temps.fb,
+ .renderArea = {
+ .offset = { rects[r].dst_x, rects[r].dst_y, },
+ .extent = { rects[r].width, rects[r].height },
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ }, VK_SUBPASS_CONTENTS_INLINE);
+
+ bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB);
+
+ ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+ ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
+ blit2d_unbind_dst(cmd_buffer, &dst_temps);
+ }
+}
+
void
anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_blit2d_surf *src,
num_rects, rects);
return;
} else if (dst->bs % 3 == 0) {
- anv_finishme("Blitting to RGB destinations not yet supported");
+ anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst,
+ num_rects, rects);
return;
} else {
assert(util_is_power_of_two(dst->bs));
return b.shader;
}
+/* RGB copies have the same interface as normal copies */
+#define rgb_vi_create_info normal_vi_create_info
+
+static nir_shader *
+build_nir_rgb_fragment_shader(struct anv_device *device,
+ texel_fetch_build_func txf_func)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec3, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
+
+ /* We need gl_FragCoord so we know our position */
+ nir_variable *frag_coord_in = nir_variable_create(b.shader,
+ nir_var_shader_in,
+ vec4, "gl_FragCoord");
+ frag_coord_in->data.location = VARYING_SLOT_POS;
+ frag_coord_in->data.origin_upper_left = true;
+
+ nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+ unsigned swiz[4] = { 0, 1 };
+ nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
+ nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
+
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
+
+ /* We figure out which component we are by the x component of FragCoord */
+ nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
+ nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0),
+ nir_imm_int(&b, 3));
+
+ /* Select the given channel from the texelFetch result */
+ nir_ssa_def *color_channel =
+ nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)),
+ nir_channel(&b, color, 0),
+ nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)),
+ nir_channel(&b, color, 1),
+ nir_channel(&b, color, 2)));
+
+ nir_ssa_def *u = nir_ssa_undef(&b, 1, 32);
+ nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1);
+
+ return b.shader;
+}
+
static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 2,
vi_create_info = &w_tiled_vi_create_info;
break;
case BLIT2D_DST_TYPE_RGB:
- /* Not yet supported */
+ /* RGB destinations and W-detiling don't mix */
+ if (src_type != BLIT2D_SRC_TYPE_NORMAL)
+ return VK_SUCCESS;
+
+ fs.nir = build_nir_rgb_fragment_shader(device, src_func);
+ vi_create_info = &rgb_vi_create_info;
+ break;
default:
return VK_SUCCESS;
}
#include "anv_private.h"
#include "nir/nir_builder.h"
+#include "util/u_format_rgb9e5.h"
+
/** Vertex attributes for color clears. */
struct color_clear_vattrs {
struct anv_vue_header vue_header;
anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *image,
VkImageLayout image_layout,
- const VkClearValue *clear_value,
+ VkClearValue clear_value,
uint32_t range_count,
const VkImageSubresourceRange *ranges)
{
VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
+ VkFormat vk_format = image->vk_format;
+ if (vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ /* We can't actually render to this format so we have to work around it
+ * by manually unpacking and using R32_UINT.
+ */
+ clear_value.color.uint32[0] =
+ float3_to_rgb9e5(clear_value.color.float32);
+ vk_format = VK_FORMAT_R32_UINT;
+ }
+
for (uint32_t r = 0; r < range_count; r++) {
const VkImageSubresourceRange *range = &ranges[r];
for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = anv_image_to_handle(image),
.viewType = anv_meta_get_view_type(image),
- .format = image->vk_format,
+ .format = vk_format,
.subresourceRange = {
.aspectMask = range->aspectMask,
.baseMipLevel = range->baseMipLevel + l,
&fb);
VkAttachmentDescription att_desc = {
- .format = iview.vk_format,
+ .format = vk_format,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
VkClearAttachment clear_att = {
.aspectMask = range->aspectMask,
.colorAttachment = 0,
- .clearValue = *clear_value,
+ .clearValue = clear_value,
};
VkClearRect clear_rect = {
meta_clear_begin(&saved_state, cmd_buffer);
anv_cmd_clear_image(cmd_buffer, image, imageLayout,
- (const VkClearValue *) pColor,
+ (VkClearValue) { .color = *pColor },
rangeCount, pRanges);
meta_clear_end(&saved_state, cmd_buffer);
meta_clear_begin(&saved_state, cmd_buffer);
anv_cmd_clear_image(cmd_buffer, image, imageLayout,
- (const VkClearValue *) pDepthStencil,
+ (VkClearValue) { .depthStencil = *pDepthStencil },
rangeCount, pRanges);
meta_clear_end(&saved_state, cmd_buffer);
anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image),
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- &clear_value, 1, &range);
+ clear_value, 1, &range);
}
void anv_CmdFillBuffer(
pAllocator ? pAllocator : &device->alloc);
if (pipeline->blend_state.map)
anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ if (pipeline->shaders[s])
+ anv_shader_bin_unref(device, pipeline->shaders[s]);
+ }
+
anv_free2(&device->alloc, pAllocator, pipeline);
}
prog_data->binding_table.image_start = bias;
}
+static struct anv_shader_bin *
+anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
+ struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
+{
+ if (cache) {
+ return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size,
+ bind_map);
+ } else {
+ return anv_shader_bin_create(pipeline->device, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
+ }
+}
+
+
static void
anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
gl_shader_stage stage,
- const struct brw_stage_prog_data *prog_data,
- struct anv_pipeline_bind_map *map)
+ struct anv_shader_bin *shader)
{
- pipeline->prog_data[stage] = prog_data;
+ pipeline->shaders[stage] = shader;
pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
- pipeline->bindings[stage] = *map;
}
static VkResult
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_vs_prog_key key;
- uint32_t kernel = NO_KERNEL;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_vs_prog_key(&pipeline->device->info, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (kernel == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_vs_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- stage_prog_data = &prog_data.base.base;
- kernel = anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
ralloc_free(mem_ctx);
}
const struct brw_vs_prog_data *vs_prog_data =
- (const struct brw_vs_prog_data *) stage_prog_data;
+ (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin);
if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
- pipeline->vs_simd8 = kernel;
+ pipeline->vs_simd8 = bin->kernel.offset;
pipeline->vs_vec4 = NO_KERNEL;
} else {
pipeline->vs_simd8 = NO_KERNEL;
- pipeline->vs_vec4 = kernel;
+ pipeline->vs_vec4 = bin->kernel.offset;
}
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
- stage_prog_data, &map);
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
return VK_SUCCESS;
}
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_gs_prog_key key;
- uint32_t kernel = NO_KERNEL;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_gs_prog_key(&pipeline->device->info, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (kernel == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_gs_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
}
/* TODO: SIMD8 GS */
- stage_prog_data = &prog_data.base.base;
- kernel = anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
ralloc_free(mem_ctx);
}
- pipeline->gs_kernel = kernel;
+ pipeline->gs_kernel = bin->kernel.offset;
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
- stage_prog_data, &map);
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
return VK_SUCCESS;
}
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_wm_prog_key key;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- pipeline->ps_ksp0 =
- anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (pipeline->ps_ksp0 == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_wm_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
assert(num_rts + array_len <= 8);
for (unsigned i = 0; i < array_len; i++) {
- rt_bindings[num_rts] = (struct anv_pipeline_binding) {
+ rt_bindings[num_rts + i] = (struct anv_pipeline_binding) {
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
.binding = 0,
.index = rt + i,
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- stage_prog_data = &prog_data.base;
- pipeline->ps_ksp0 =
- anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
ralloc_free(mem_ctx);
}
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
- stage_prog_data, &map);
+ pipeline->ps_ksp0 = bin->kernel.offset;
+
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
return VK_SUCCESS;
}
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_cs_prog_key key;
- uint32_t kernel = NO_KERNEL;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_cs_prog_key(&pipeline->device->info, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (module->size == 0 || kernel == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_cs_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- stage_prog_data = &prog_data.base;
- kernel = anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
ralloc_free(mem_ctx);
}
- pipeline->cs_simd = kernel;
+ pipeline->cs_simd = bin->kernel.offset;
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
- stage_prog_data, &map);
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
return VK_SUCCESS;
}
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
- memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
- memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+ memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
pipeline->vs_simd8 = NO_KERNEL;
pipeline->vs_vec4 = NO_KERNEL;
}
if (modules[MESA_SHADER_VERTEX]) {
- anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
- modules[MESA_SHADER_VERTEX],
- pStages[MESA_SHADER_VERTEX]->pName,
- pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
+ result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
+ modules[MESA_SHADER_VERTEX],
+ pStages[MESA_SHADER_VERTEX]->pName,
+ pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
+ if (result != VK_SUCCESS)
+ goto compile_fail;
}
if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL])
anv_finishme("no tessellation support");
if (modules[MESA_SHADER_GEOMETRY]) {
- anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
- modules[MESA_SHADER_GEOMETRY],
- pStages[MESA_SHADER_GEOMETRY]->pName,
- pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
+ result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
+ modules[MESA_SHADER_GEOMETRY],
+ pStages[MESA_SHADER_GEOMETRY]->pName,
+ pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
+ if (result != VK_SUCCESS)
+ goto compile_fail;
}
if (modules[MESA_SHADER_FRAGMENT]) {
- anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra,
- modules[MESA_SHADER_FRAGMENT],
- pStages[MESA_SHADER_FRAGMENT]->pName,
- pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
+ result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra,
+ modules[MESA_SHADER_FRAGMENT],
+ pStages[MESA_SHADER_FRAGMENT]->pName,
+ pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
+ if (result != VK_SUCCESS)
+ goto compile_fail;
}
if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
pipeline->topology = _3DPRIM_RECTLIST;
return VK_SUCCESS;
+
+compile_fail:
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ if (pipeline->shaders[s])
+ anv_shader_bin_unref(device, pipeline->shaders[s]);
+ }
+
+ anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
+
+ return result;
}
VkResult
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
- if (cache == NULL)
- cache = &device->default_pipeline_cache;
-
switch (device->info.gen) {
case 7:
if (device->info.is_haswell)
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
- if (cache == NULL)
- cache = &device->default_pipeline_cache;
-
switch (device->info.gen) {
case 7:
if (device->info.is_haswell)
*/
#include "util/mesa-sha1.h"
+#include "util/hash_table.h"
#include "util/debug.h"
#include "anv_private.h"
+struct shader_bin_key {
+ uint32_t size;
+ uint8_t data[0];
+};
+
+static size_t
+anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
+ uint32_t surface_count, uint32_t sampler_count)
+{
+ const uint32_t binding_data_size =
+ (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
+
+ return align_u32(sizeof(struct anv_shader_bin), 8) +
+ align_u32(prog_data_size, 8) +
+ align_u32(sizeof(uint32_t) + key_size, 8) +
+ align_u32(binding_data_size, 8);
+}
+
+static inline const struct shader_bin_key *
+anv_shader_bin_get_key(const struct anv_shader_bin *shader)
+{
+ const void *data = shader;
+ data += align_u32(sizeof(struct anv_shader_bin), 8);
+ data += align_u32(shader->prog_data_size, 8);
+ return data;
+}
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
+{
+ const size_t size =
+ anv_shader_bin_size(prog_data_size, key_size,
+ bind_map->surface_count, bind_map->sampler_count);
+
+ struct anv_shader_bin *shader =
+ anv_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!shader)
+ return NULL;
+
+ shader->ref_cnt = 1;
+
+ shader->kernel =
+ anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
+ memcpy(shader->kernel.map, kernel_data, kernel_size);
+ shader->kernel_size = kernel_size;
+ shader->bind_map = *bind_map;
+ shader->prog_data_size = prog_data_size;
+
+ /* Now we fill out the floating data at the end */
+ void *data = shader;
+ data += align_u32(sizeof(struct anv_shader_bin), 8);
+
+ memcpy(data, prog_data, prog_data_size);
+ data += align_u32(prog_data_size, 8);
+
+ struct shader_bin_key *key = data;
+ key->size = key_size;
+ memcpy(key->data, key_data, key_size);
+ data += align_u32(sizeof(*key) + key_size, 8);
+
+ shader->bind_map.surface_to_descriptor = data;
+ memcpy(data, bind_map->surface_to_descriptor,
+ bind_map->surface_count * sizeof(struct anv_pipeline_binding));
+ data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
+
+ shader->bind_map.sampler_to_descriptor = data;
+ memcpy(data, bind_map->sampler_to_descriptor,
+ bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
+
+ return shader;
+}
+
+void
+anv_shader_bin_destroy(struct anv_device *device,
+ struct anv_shader_bin *shader)
+{
+ assert(shader->ref_cnt == 0);
+ anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
+ anv_free(&device->alloc, shader);
+}
+
+static size_t
+anv_shader_bin_data_size(const struct anv_shader_bin *shader)
+{
+ return anv_shader_bin_size(shader->prog_data_size,
+ anv_shader_bin_get_key(shader)->size,
+ shader->bind_map.surface_count,
+ shader->bind_map.sampler_count) +
+ align_u32(shader->kernel_size, 8);
+}
+
+static void
+anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
+{
+ size_t struct_size =
+ anv_shader_bin_size(shader->prog_data_size,
+ anv_shader_bin_get_key(shader)->size,
+ shader->bind_map.surface_count,
+ shader->bind_map.sampler_count);
+
+ memcpy(data, shader, struct_size);
+ data += struct_size;
+
+ memcpy(data, shader->kernel.map, shader->kernel_size);
+}
+
/* Remaining work:
*
* - Compact binding table layout so it's tight and not dependent on
* dual_src_blend.
*/
+static uint32_t
+shader_bin_key_hash_func(const void *void_key)
+{
+ const struct shader_bin_key *key = void_key;
+ return _mesa_hash_data(key->data, key->size);
+}
+
+static bool
+shader_bin_key_compare_func(const void *void_a, const void *void_b)
+{
+ const struct shader_bin_key *a = void_a, *b = void_b;
+ if (a->size != b->size)
+ return false;
+
+ return memcmp(a->data, b->data, a->size) == 0;
+}
+
void
anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
- struct anv_device *device)
+ struct anv_device *device,
+ bool cache_enabled)
{
cache->device = device;
- anv_state_stream_init(&cache->program_stream,
- &device->instruction_block_pool);
pthread_mutex_init(&cache->mutex, NULL);
- cache->kernel_count = 0;
- cache->total_size = 0;
- cache->table_size = 1024;
- const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
- cache->hash_table = malloc(byte_size);
-
- /* We don't consider allocation failure fatal, we just start with a 0-sized
- * cache. */
- if (cache->hash_table == NULL ||
- !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
- cache->table_size = 0;
- else
- memset(cache->hash_table, 0xff, byte_size);
+ if (cache_enabled) {
+ cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
+ shader_bin_key_compare_func);
+ } else {
+ cache->cache = NULL;
+ }
}
void
anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
{
- anv_state_stream_finish(&cache->program_stream);
pthread_mutex_destroy(&cache->mutex);
- free(cache->hash_table);
-}
-
-struct cache_entry {
- unsigned char sha1[20];
- uint32_t prog_data_size;
- uint32_t kernel_size;
- uint32_t surface_count;
- uint32_t sampler_count;
- uint32_t image_count;
-
- char prog_data[0];
-
- /* kernel follows prog_data at next 64 byte aligned address */
-};
-
-static uint32_t
-entry_size(struct cache_entry *entry)
-{
- /* This returns the number of bytes needed to serialize an entry, which
- * doesn't include the alignment padding bytes.
- */
- const uint32_t map_size =
- entry->surface_count * sizeof(struct anv_pipeline_binding) +
- entry->sampler_count * sizeof(struct anv_pipeline_binding);
+ if (cache->cache) {
+ /* This is a bit unfortunate. In order to keep things from randomly
+ * going away, the shader cache has to hold a reference to all shader
+ * binaries it contains. We unref them when we destroy the cache.
+ */
+ struct hash_entry *entry;
+ hash_table_foreach(cache->cache, entry)
+ anv_shader_bin_unref(cache->device, entry->data);
- return sizeof(*entry) + entry->prog_data_size + map_size;
+ _mesa_hash_table_destroy(cache->cache, NULL);
+ }
}
void
anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
struct anv_shader_module *module,
const char *entrypoint,
+ const struct anv_pipeline_layout *pipeline_layout,
const VkSpecializationInfo *spec_info)
{
struct mesa_sha1 *ctx;
_mesa_sha1_update(ctx, key, key_size);
_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
+ if (pipeline_layout) {
+ _mesa_sha1_update(ctx, pipeline_layout->sha1,
+ sizeof(pipeline_layout->sha1));
+ }
/* hash in shader stage, pipeline layout? */
if (spec_info) {
_mesa_sha1_update(ctx, spec_info->pMapEntries,
_mesa_sha1_final(ctx, hash);
}
-static uint32_t
-anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data,
- struct anv_pipeline_bind_map *map)
+static struct anv_shader_bin *
+anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size)
{
- const uint32_t mask = cache->table_size - 1;
- const uint32_t start = (*(uint32_t *) sha1);
-
- for (uint32_t i = 0; i < cache->table_size; i++) {
- const uint32_t index = (start + i) & mask;
- const uint32_t offset = cache->hash_table[index];
-
- if (offset == ~0)
- return NO_KERNEL;
-
- struct cache_entry *entry =
- cache->program_stream.block_pool->map + offset;
- if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
- if (prog_data) {
- assert(map);
- void *p = entry->prog_data;
- *prog_data = p;
- p += entry->prog_data_size;
- map->surface_count = entry->surface_count;
- map->sampler_count = entry->sampler_count;
- map->image_count = entry->image_count;
- map->surface_to_descriptor = p;
- p += map->surface_count * sizeof(struct anv_pipeline_binding);
- map->sampler_to_descriptor = p;
- }
-
- return offset + align_u32(entry_size(entry), 64);
- }
- }
-
- /* This can happen if the pipeline cache is disabled via
- * ANV_ENABLE_PIPELINE_CACHE=false
- */
- return NO_KERNEL;
+ uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
+ struct shader_bin_key *key = (void *)vla;
+ key->size = key_size;
+ memcpy(key->data, key_data, key_size);
+
+ struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
+ if (entry)
+ return entry->data;
+ else
+ return NULL;
}
-uint32_t
+struct anv_shader_bin *
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data,
- struct anv_pipeline_bind_map *map)
+ const void *key_data, uint32_t key_size)
{
- uint32_t kernel;
+ if (!cache->cache)
+ return NULL;
pthread_mutex_lock(&cache->mutex);
- kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+ struct anv_shader_bin *shader =
+ anv_pipeline_cache_search_locked(cache, key_data, key_size);
pthread_mutex_unlock(&cache->mutex);
- return kernel;
-}
-
-static void
-anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
- struct cache_entry *entry, uint32_t entry_offset)
-{
- const uint32_t mask = cache->table_size - 1;
- const uint32_t start = (*(uint32_t *) entry->sha1);
-
- /* We'll always be able to insert when we get here. */
- assert(cache->kernel_count < cache->table_size / 2);
+ /* We increment refcount before handing it to the caller */
+ if (shader)
+ anv_shader_bin_ref(shader);
- for (uint32_t i = 0; i < cache->table_size; i++) {
- const uint32_t index = (start + i) & mask;
- if (cache->hash_table[index] == ~0) {
- cache->hash_table[index] = entry_offset;
- break;
- }
- }
-
- cache->total_size += entry_size(entry) + entry->kernel_size;
- cache->kernel_count++;
+ return shader;
}
-static VkResult
-anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
+static struct anv_shader_bin *
+anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
{
- const uint32_t table_size = cache->table_size * 2;
- const uint32_t old_table_size = cache->table_size;
- const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
- uint32_t *table;
- uint32_t *old_table = cache->hash_table;
-
- table = malloc(byte_size);
- if (table == NULL)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- cache->hash_table = table;
- cache->table_size = table_size;
- cache->kernel_count = 0;
- cache->total_size = 0;
-
- memset(cache->hash_table, 0xff, byte_size);
- for (uint32_t i = 0; i < old_table_size; i++) {
- const uint32_t offset = old_table[i];
- if (offset == ~0)
- continue;
+ struct anv_shader_bin *shader =
+ anv_pipeline_cache_search_locked(cache, key_data, key_size);
+ if (shader)
+ return shader;
- struct cache_entry *entry =
- cache->program_stream.block_pool->map + offset;
- anv_pipeline_cache_set_entry(cache, entry, offset);
- }
+ struct anv_shader_bin *bin =
+ anv_shader_bin_create(cache->device, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
+ if (!bin)
+ return NULL;
- free(old_table);
+ _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
- return VK_SUCCESS;
+ return bin;
}
-static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
- struct cache_entry *entry, uint32_t entry_offset)
-{
- if (cache->kernel_count == cache->table_size / 2)
- anv_pipeline_cache_grow(cache);
-
- /* Failing to grow that hash table isn't fatal, but may mean we don't
- * have enough space to add this new kernel. Only add it if there's room.
- */
- if (cache->kernel_count < cache->table_size / 2)
- anv_pipeline_cache_set_entry(cache, entry, entry_offset);
-}
-
-uint32_t
+struct anv_shader_bin *
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const void *kernel, size_t kernel_size,
- const struct brw_stage_prog_data **prog_data,
- size_t prog_data_size,
- struct anv_pipeline_bind_map *map)
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
{
- pthread_mutex_lock(&cache->mutex);
-
- /* Before uploading, check again that another thread didn't upload this
- * shader while we were compiling it.
- */
- if (sha1) {
- uint32_t cached_kernel =
- anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
- if (cached_kernel != NO_KERNEL) {
- pthread_mutex_unlock(&cache->mutex);
- return cached_kernel;
- }
- }
+ if (cache->cache) {
+ pthread_mutex_lock(&cache->mutex);
- struct cache_entry *entry;
+ struct anv_shader_bin *bin =
+ anv_pipeline_cache_add_shader(cache, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
- const uint32_t map_size =
- map->surface_count * sizeof(struct anv_pipeline_binding) +
- map->sampler_count * sizeof(struct anv_pipeline_binding);
+ pthread_mutex_unlock(&cache->mutex);
- const uint32_t preamble_size =
- align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
+ /* We increment refcount before handing it to the caller */
+ anv_shader_bin_ref(bin);
- const uint32_t size = preamble_size + kernel_size;
-
- assert(size < cache->program_stream.block_pool->block_size);
- const struct anv_state state =
- anv_state_stream_alloc(&cache->program_stream, size, 64);
-
- entry = state.map;
- entry->prog_data_size = prog_data_size;
- entry->surface_count = map->surface_count;
- entry->sampler_count = map->sampler_count;
- entry->image_count = map->image_count;
- entry->kernel_size = kernel_size;
-
- void *p = entry->prog_data;
- memcpy(p, *prog_data, prog_data_size);
- p += prog_data_size;
-
- memcpy(p, map->surface_to_descriptor,
- map->surface_count * sizeof(struct anv_pipeline_binding));
- map->surface_to_descriptor = p;
- p += map->surface_count * sizeof(struct anv_pipeline_binding);
-
- memcpy(p, map->sampler_to_descriptor,
- map->sampler_count * sizeof(struct anv_pipeline_binding));
- map->sampler_to_descriptor = p;
-
- if (sha1) {
- assert(anv_pipeline_cache_search_unlocked(cache, sha1,
- NULL, NULL) == NO_KERNEL);
-
- memcpy(entry->sha1, sha1, sizeof(entry->sha1));
- anv_pipeline_cache_add_entry(cache, entry, state.offset);
+ return bin;
+ } else {
+ /* In this case, we're not caching it so the caller owns it entirely */
+ return anv_shader_bin_create(cache->device, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
}
-
- pthread_mutex_unlock(&cache->mutex);
-
- memcpy(state.map + preamble_size, kernel, kernel_size);
-
- if (!cache->device->info.has_llc)
- anv_state_clflush(state);
-
- *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
- return state.offset + preamble_size;
}
struct cache_header {
struct cache_header header;
uint8_t uuid[VK_UUID_SIZE];
+ if (cache->cache == NULL)
+ return;
+
if (size < sizeof(header))
return;
memcpy(&header, data, sizeof(header));
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
return;
- void *end = (void *) data + size;
- void *p = (void *) data + header.header_size;
-
- while (p < end) {
- struct cache_entry *entry = p;
-
- void *data = entry->prog_data;
- const struct brw_stage_prog_data *prog_data = data;
- data += entry->prog_data_size;
-
- struct anv_pipeline_binding *surface_to_descriptor = data;
- data += entry->surface_count * sizeof(struct anv_pipeline_binding);
- struct anv_pipeline_binding *sampler_to_descriptor = data;
- data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
- void *kernel = data;
-
- struct anv_pipeline_bind_map map = {
- .surface_count = entry->surface_count,
- .sampler_count = entry->sampler_count,
- .image_count = entry->image_count,
- .surface_to_descriptor = surface_to_descriptor,
- .sampler_to_descriptor = sampler_to_descriptor
- };
-
- anv_pipeline_cache_upload_kernel(cache, entry->sha1,
- kernel, entry->kernel_size,
- &prog_data,
- entry->prog_data_size, &map);
- p = kernel + entry->kernel_size;
+ const void *end = data + size;
+ const void *p = data + header.header_size;
+
+ /* Count is the total number of valid entries */
+ uint32_t count;
+ if (p + sizeof(count) >= end)
+ return;
+ memcpy(&count, p, sizeof(count));
+ p += align_u32(sizeof(count), 8);
+
+ for (uint32_t i = 0; i < count; i++) {
+ struct anv_shader_bin bin;
+ if (p + sizeof(bin) > end)
+ break;
+ memcpy(&bin, p, sizeof(bin));
+ p += align_u32(sizeof(struct anv_shader_bin), 8);
+
+ const void *prog_data = p;
+ p += align_u32(bin.prog_data_size, 8);
+
+ struct shader_bin_key key;
+ if (p + sizeof(key) > end)
+ break;
+ memcpy(&key, p, sizeof(key));
+ const void *key_data = p + sizeof(key);
+ p += align_u32(sizeof(key) + key.size, 8);
+
+ /* We're going to memcpy this so getting rid of const is fine */
+ struct anv_pipeline_binding *bindings = (void *)p;
+ p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
+ sizeof(struct anv_pipeline_binding), 8);
+ bin.bind_map.surface_to_descriptor = bindings;
+ bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
+
+ const void *kernel_data = p;
+ p += align_u32(bin.kernel_size, 8);
+
+ if (p > end)
+ break;
+
+ anv_pipeline_cache_add_shader(cache, key_data, key.size,
+ kernel_data, bin.kernel_size,
+ prog_data, bin.prog_data_size,
+ &bin.bind_map);
}
}
+static bool
+pipeline_cache_enabled()
+{
+ static int enabled = -1;
+ if (enabled < 0)
+ enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
+ return enabled;
+}
+
VkResult anv_CreatePipelineCache(
VkDevice _device,
const VkPipelineCacheCreateInfo* pCreateInfo,
if (cache == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- anv_pipeline_cache_init(cache, device);
+ anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
if (pCreateInfo->initialDataSize > 0)
anv_pipeline_cache_load(cache,
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
struct cache_header *header;
- const size_t size = sizeof(*header) + cache->total_size;
-
if (pData == NULL) {
+ size_t size = align_u32(sizeof(*header), 8) +
+ align_u32(sizeof(uint32_t), 8);
+
+ if (cache->cache) {
+ struct hash_entry *entry;
+ hash_table_foreach(cache->cache, entry)
+ size += anv_shader_bin_data_size(entry->data);
+ }
+
*pDataSize = size;
return VK_SUCCESS;
}
header->vendor_id = 0x8086;
header->device_id = device->chipset_id;
anv_device_get_cache_uuid(header->uuid);
- p += header->header_size;
+ p += align_u32(header->header_size, 8);
- struct cache_entry *entry;
- for (uint32_t i = 0; i < cache->table_size; i++) {
- if (cache->hash_table[i] == ~0)
- continue;
+ uint32_t *count = p;
+ p += align_u32(sizeof(*count), 8);
+ *count = 0;
- entry = cache->program_stream.block_pool->map + cache->hash_table[i];
- const uint32_t size = entry_size(entry);
- if (end < p + size + entry->kernel_size)
- break;
-
- memcpy(p, entry, size);
- p += size;
+ if (cache->cache) {
+ struct hash_entry *entry;
+ hash_table_foreach(cache->cache, entry) {
+ struct anv_shader_bin *shader = entry->data;
+ size_t data_size = anv_shader_bin_data_size(entry->data);
+ if (p + data_size > end)
+ break;
- void *kernel = (void *) entry + align_u32(size, 64);
+ anv_shader_bin_write_data(shader, p);
+ p += data_size;
- memcpy(p, kernel, entry->kernel_size);
- p += entry->kernel_size;
+ (*count)++;
+ }
}
*pDataSize = p - pData;
return VK_SUCCESS;
}
-static void
-anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
- struct anv_pipeline_cache *src)
-{
- for (uint32_t i = 0; i < src->table_size; i++) {
- const uint32_t offset = src->hash_table[i];
- if (offset == ~0)
- continue;
-
- struct cache_entry *entry =
- src->program_stream.block_pool->map + offset;
-
- if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
- continue;
-
- anv_pipeline_cache_add_entry(dst, entry, offset);
- }
-}
-
VkResult anv_MergePipelineCaches(
VkDevice _device,
VkPipelineCache destCache,
{
ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
+ if (!dst->cache)
+ return VK_SUCCESS;
+
for (uint32_t i = 0; i < srcCacheCount; i++) {
ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
+ if (!src->cache)
+ continue;
+
+ struct hash_entry *entry;
+ hash_table_foreach(src->cache, entry) {
+ struct anv_shader_bin *bin = entry->data;
+ if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
+ continue;
- anv_pipeline_cache_merge(dst, src);
+ anv_shader_bin_ref(bin);
+ _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
+ }
}
return VK_SUCCESS;
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static inline uint32_t
+align_down_npot_u32(uint32_t v, uint32_t a)
+{
+ return v - (v % a);
+}
+
+static inline uint32_t
align_u32(uint32_t v, uint32_t a)
{
assert(a != 0 && a == (a & -a));
};
#define ANV_MIN_STATE_SIZE_LOG2 6
-#define ANV_MAX_STATE_SIZE_LOG2 10
+#define ANV_MAX_STATE_SIZE_LOG2 17
-#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2)
+#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
struct anv_state_pool {
struct anv_block_pool *block_pool;
struct anv_pipeline_cache {
struct anv_device * device;
- struct anv_state_stream program_stream;
pthread_mutex_t mutex;
- uint32_t total_size;
- uint32_t table_size;
- uint32_t kernel_count;
- uint32_t * hash_table;
+ struct hash_table * cache;
};
struct anv_pipeline_bind_map;
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
- struct anv_device *device);
+ struct anv_device *device,
+ bool cache_enabled);
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
-uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data,
- struct anv_pipeline_bind_map *map);
-uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const void *kernel,
- size_t kernel_size,
- const struct brw_stage_prog_data **prog_data,
- size_t prog_data_size,
- struct anv_pipeline_bind_map *map);
+
+struct anv_shader_bin *
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+ const void *key, uint32_t key_size);
+struct anv_shader_bin *
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map);
struct anv_device {
VK_LOADER_DATA _loader_data;
struct anv_state_pool dynamic_state_pool;
struct anv_block_pool instruction_block_pool;
- struct anv_pipeline_cache default_pipeline_cache;
+ struct anv_state_pool instruction_state_pool;
struct anv_block_pool surface_state_block_pool;
struct anv_state_pool surface_state_pool;
struct {
bool has_dynamic_offsets;
} stage[MESA_SHADER_STAGES];
+
+ unsigned char sha1[20];
};
struct anv_buffer {
void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
struct anv_shader_module *module,
const char *entrypoint,
+ const struct anv_pipeline_layout *pipeline_layout,
const VkSpecializationInfo *spec_info);
static inline gl_shader_stage
uint32_t surface_count;
uint32_t sampler_count;
uint32_t image_count;
- uint32_t attachment_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
- uint32_t * surface_to_attachment;
};
+struct anv_shader_bin {
+ uint32_t ref_cnt;
+
+ struct anv_state kernel;
+ uint32_t kernel_size;
+
+ struct anv_pipeline_bind_map bind_map;
+
+ uint32_t prog_data_size;
+
+ /* Prog data follows, then the key, both aligned to 8-bytes */
+};
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+ const void *key, uint32_t key_size,
+ const void *kernel, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map);
+
+void
+anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
+
+static inline void
+anv_shader_bin_ref(struct anv_shader_bin *shader)
+{
+ assert(shader->ref_cnt >= 1);
+ __sync_fetch_and_add(&shader->ref_cnt, 1);
+}
+
+static inline void
+anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
+{
+ assert(shader->ref_cnt >= 1);
+ if (__sync_fetch_and_add(&shader->ref_cnt, -1) == 1)
+ anv_shader_bin_destroy(device, shader);
+}
+
+static inline const struct brw_stage_prog_data *
+anv_shader_bin_get_prog_data(const struct anv_shader_bin *shader)
+{
+ const void *data = shader;
+ data += align_u32(sizeof(struct anv_shader_bin), 8);
+ return data;
+}
+
struct anv_pipeline {
struct anv_device * device;
struct anv_batch batch;
struct anv_dynamic_state dynamic_state;
struct anv_pipeline_layout * layout;
- struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES];
bool use_repclear;
bool needs_data_cache;
- const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES];
+ struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
+
struct {
uint32_t start[MESA_SHADER_GEOMETRY + 1];
uint32_t size[MESA_SHADER_GEOMETRY + 1];
} gen9;
};
-static inline const struct brw_vs_prog_data *
-get_vs_prog_data(struct anv_pipeline *pipeline)
-{
- return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX];
-}
-
-static inline const struct brw_gs_prog_data *
-get_gs_prog_data(struct anv_pipeline *pipeline)
+static inline bool
+anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
+ gl_shader_stage stage)
{
- return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY];
+ return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
}
-static inline const struct brw_wm_prog_data *
-get_wm_prog_data(struct anv_pipeline *pipeline)
-{
- return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT];
+#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \
+static inline const struct brw_##prefix##_prog_data * \
+get_##prefix##_prog_data(struct anv_pipeline *pipeline) \
+{ \
+ if (anv_pipeline_has_stage(pipeline, stage)) { \
+ return (const struct brw_##prefix##_prog_data *) \
+ anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \
+ } else { \
+ return NULL; \
+ } \
}
-static inline const struct brw_cs_prog_data *
-get_cs_prog_data(struct anv_pipeline *pipeline)
-{
- return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE];
-}
+ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
+ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
+ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
+ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
struct anv_graphics_pipeline_create_info {
/**
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
}
-static void
-gen7_emit_cb_state(struct anv_pipeline *pipeline,
- const VkPipelineColorBlendStateCreateInfo *info,
- const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
- struct anv_device *device = pipeline->device;
-
- if (info == NULL || info->attachmentCount == 0) {
- pipeline->blend_state =
- anv_state_pool_emit(&device->dynamic_state_pool,
- GENX(BLEND_STATE), 64,
- .ColorBufferBlendEnable = false,
- .WriteDisableAlpha = true,
- .WriteDisableRed = true,
- .WriteDisableGreen = true,
- .WriteDisableBlue = true);
- } else {
- const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
- struct GENX(BLEND_STATE) blend = {
- .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
- .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
-
- .LogicOpEnable = info->logicOpEnable,
- .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
- .ColorBufferBlendEnable = a->blendEnable,
- .ColorClampRange = COLORCLAMP_RTFORMAT,
- .PreBlendColorClampEnable = true,
- .PostBlendColorClampEnable = true,
- .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
- .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
- .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
- .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
- .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
- .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
- .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
- .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
- .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
- .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
- };
-
- /* Our hardware applies the blend factor prior to the blend function
- * regardless of what function is used. Technically, this means the
- * hardware can do MORE than GL or Vulkan specify. However, it also
- * means that, for MIN and MAX, we have to stomp the blend factor to
- * ONE to make it a no-op.
- */
- if (a->colorBlendOp == VK_BLEND_OP_MIN ||
- a->colorBlendOp == VK_BLEND_OP_MAX) {
- blend.SourceBlendFactor = BLENDFACTOR_ONE;
- blend.DestinationBlendFactor = BLENDFACTOR_ONE;
- }
- if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
- a->alphaBlendOp == VK_BLEND_OP_MAX) {
- blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
- blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
- }
-
- pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool,
- GENX(BLEND_STATE_length) * 4,
- 64);
- GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend);
- if (pipeline->device->info.has_llc)
- anv_state_clflush(pipeline->blend_state);
- }
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
- bsp.BlendStatePointer = pipeline->blend_state.offset;
- }
-}
-
VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
- gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
- pCreateInfo->pMultisampleState);
+ emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
+ pCreateInfo->pMultisampleState);
emit_urb_setup(pipeline);
- const VkPipelineRasterizationStateCreateInfo *rs_info =
- pCreateInfo->pRasterizationState;
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
- clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
- clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
- clip.ClipEnable = !(extra && extra->use_rectlist),
- clip.APIMode = APIMODE_OGL,
- clip.ViewportXYClipTestEnable = true,
- clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
- clip.ClipMode = CLIPMODE_NORMAL,
-
- clip.TriangleStripListProvokingVertexSelect = 0,
- clip.LineStripListProvokingVertexSelect = 0,
- clip.TriangleFanProvokingVertexSelect = 1,
-
- clip.MinimumPointWidth = 0.125,
- clip.MaximumPointWidth = 255.875,
- clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
- }
+ emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+ pCreateInfo->pRasterizationState, extra);
if (pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->rasterizationSamples > 1)
wm.LineEndCapAntialiasingRegionWidth = 0; /* 0.5 pixels */
wm.LineAntialiasingRegionWidth = 1; /* 1.0 pixels */
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+ wm.PixelShaderKillPixel = wm_prog_data->uses_kill;
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
}
static void
-emit_cb_state(struct anv_pipeline *pipeline,
- const VkPipelineColorBlendStateCreateInfo *info,
- const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
- struct anv_device *device = pipeline->device;
-
- uint32_t num_dwords = GENX(BLEND_STATE_length);
- pipeline->blend_state =
- anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
-
- struct GENX(BLEND_STATE) blend_state = {
- .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
- .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
- };
-
- /* Default everything to disabled */
- for (uint32_t i = 0; i < 8; i++) {
- blend_state.Entry[i].WriteDisableAlpha = true;
- blend_state.Entry[i].WriteDisableRed = true;
- blend_state.Entry[i].WriteDisableGreen = true;
- blend_state.Entry[i].WriteDisableBlue = true;
- }
-
- struct anv_pipeline_bind_map *map =
- &pipeline->bindings[MESA_SHADER_FRAGMENT];
-
- bool has_writeable_rt = false;
- for (unsigned i = 0; i < map->surface_count; i++) {
- struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
-
- /* All color attachments are at the beginning of the binding table */
- if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
- break;
-
- /* We can have at most 8 attachments */
- assert(i < 8);
-
- if (binding->index >= info->attachmentCount)
- continue;
-
- assert(binding->binding == 0);
- const VkPipelineColorBlendAttachmentState *a =
- &info->pAttachments[binding->index];
-
- if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
- a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
- a->colorBlendOp != a->alphaBlendOp) {
- blend_state.IndependentAlphaBlendEnable = true;
- }
-
- blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
- .LogicOpEnable = info->logicOpEnable,
- .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
- .ColorBufferBlendEnable = a->blendEnable,
- .PreBlendSourceOnlyClampEnable = false,
- .ColorClampRange = COLORCLAMP_RTFORMAT,
- .PreBlendColorClampEnable = true,
- .PostBlendColorClampEnable = true,
- .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
- .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
- .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
- .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
- .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
- .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
- .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
- .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
- .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
- .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
- };
-
- if (a->colorWriteMask != 0)
- has_writeable_rt = true;
-
- /* Our hardware applies the blend factor prior to the blend function
- * regardless of what function is used. Technically, this means the
- * hardware can do MORE than GL or Vulkan specify. However, it also
- * means that, for MIN and MAX, we have to stomp the blend factor to
- * ONE to make it a no-op.
- */
- if (a->colorBlendOp == VK_BLEND_OP_MIN ||
- a->colorBlendOp == VK_BLEND_OP_MAX) {
- blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
- blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
- }
- if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
- a->alphaBlendOp == VK_BLEND_OP_MAX) {
- blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
- blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
- }
- }
-
- struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
- blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
- blend.HasWriteableRT = has_writeable_rt;
- blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable;
- blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor;
- blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor;
- blend.SourceBlendFactor = bs0->SourceBlendFactor;
- blend.DestinationBlendFactor = bs0->DestinationBlendFactor;
- blend.AlphaTestEnable = false;
- blend.IndependentAlphaBlendEnable =
- blend_state.IndependentAlphaBlendEnable;
- }
-
- GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
- if (!device->info.has_llc)
- anv_state_clflush(pipeline->blend_state);
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
- bsp.BlendStatePointer = pipeline->blend_state.offset;
- bsp.BlendStatePointerValid = true;
- }
-}
-
-static void
emit_ms_state(struct anv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *info)
{
emit_urb_setup(pipeline);
- const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
- clip.ClipEnable = !(extra && extra->use_rectlist);
- clip.EarlyCullEnable = true;
- clip.APIMode = 1; /* D3D */
- clip.ViewportXYClipTestEnable = true;
-
- clip.ClipMode =
- pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
- REJECT_ALL : NORMAL;
-
- clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
- (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
-
- clip.TriangleStripListProvokingVertexSelect = 0;
- clip.LineStripListProvokingVertexSelect = 0;
- clip.TriangleFanProvokingVertexSelect = 1;
-
- clip.MinimumPointWidth = 0.125;
- clip.MaximumPointWidth = 255.875;
- clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
- }
+ emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+ pCreateInfo->pRasterizationState, extra);
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
wm.StatisticsEnable = true;
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
}
}
+#else
+void genX(CmdCopyQueryPoolResults)(
+ VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount,
+ VkBuffer destBuffer,
+ VkDeviceSize destOffset,
+ VkDeviceSize destStride,
+ VkQueryResultFlags flags)
+{
+ anv_finishme("Queries not yet supported on Ivy Bridge");
+}
#endif
bool needs_dc = false, needs_slm = false;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i];
+ if (!anv_pipeline_has_stage(pipeline, i))
+ continue;
+
+ const struct brw_stage_prog_data *prog_data =
+ anv_shader_bin_get_prog_data(pipeline->shaders[i]);
needs_dc |= pipeline->needs_data_cache;
- needs_slm |= prog_data && prog_data->total_shared;
+ needs_slm |= prog_data->total_shared;
}
return get_default_l3_weights(&pipeline->device->info,
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
- memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
- memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+ memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
pipeline->vs_simd8 = NO_KERNEL;
pipeline->vs_vec4 = NO_KERNEL;
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module);
- anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
- pCreateInfo->stage.pName,
- pCreateInfo->stage.pSpecializationInfo);
+ result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
+ pCreateInfo->stage.pName,
+ pCreateInfo->stage.pSpecializationInfo);
+ if (result != VK_SUCCESS) {
+ anv_free2(&device->alloc, pAllocator, pipeline);
+ return result;
+ }
pipeline->use_repclear = false;
if (input_index < 0)
continue;
+ if (attr == VARYING_SLOT_PNTC) {
+ sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
+ continue;
+ }
+
const int slot = fs_input_map->varying_to_slot[attr];
if (input_index >= 16)
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil);
#endif
}
+
+static void
+emit_cb_state(struct anv_pipeline *pipeline,
+ const VkPipelineColorBlendStateCreateInfo *info,
+ const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+ struct anv_device *device = pipeline->device;
+
+ const uint32_t num_dwords = GENX(BLEND_STATE_length);
+ pipeline->blend_state =
+ anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
+
+ struct GENX(BLEND_STATE) blend_state = {
+#if GEN_GEN >= 8
+ .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+ .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+#else
+ /* Make sure it gets zeroed */
+ .Entry = { { 0, }, },
+#endif
+ };
+
+ /* Default everything to disabled */
+ for (uint32_t i = 0; i < 8; i++) {
+ blend_state.Entry[i].WriteDisableAlpha = true;
+ blend_state.Entry[i].WriteDisableRed = true;
+ blend_state.Entry[i].WriteDisableGreen = true;
+ blend_state.Entry[i].WriteDisableBlue = true;
+ }
+
+ uint32_t surface_count = 0;
+ struct anv_pipeline_bind_map *map;
+ if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+ map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
+ surface_count = map->surface_count;
+ }
+
+ bool has_writeable_rt = false;
+ for (unsigned i = 0; i < surface_count; i++) {
+ struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
+
+ /* All color attachments are at the beginning of the binding table */
+ if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
+ break;
+
+ /* We can have at most 8 attachments */
+ assert(i < 8);
+
+ if (binding->index >= info->attachmentCount)
+ continue;
+
+ assert(binding->binding == 0);
+ const VkPipelineColorBlendAttachmentState *a =
+ &info->pAttachments[binding->index];
+
+ blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
+#if GEN_GEN < 8
+ .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+ .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+#endif
+ .LogicOpEnable = info->logicOpEnable,
+ .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+ .ColorBufferBlendEnable = a->blendEnable,
+ .ColorClampRange = COLORCLAMP_RTFORMAT,
+ .PreBlendColorClampEnable = true,
+ .PostBlendColorClampEnable = true,
+ .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
+ .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
+ .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
+ .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
+ .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
+ .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
+ .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+ .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+ .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+ .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+ };
+
+ if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
+ a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
+ a->colorBlendOp != a->alphaBlendOp) {
+#if GEN_GEN >= 8
+ blend_state.IndependentAlphaBlendEnable = true;
+#else
+ blend_state.Entry[i].IndependentAlphaBlendEnable = true;
+#endif
+ }
+
+ if (a->colorWriteMask != 0)
+ has_writeable_rt = true;
+
+ /* Our hardware applies the blend factor prior to the blend function
+ * regardless of what function is used. Technically, this means the
+ * hardware can do MORE than GL or Vulkan specify. However, it also
+ * means that, for MIN and MAX, we have to stomp the blend factor to
+ * ONE to make it a no-op.
+ */
+ if (a->colorBlendOp == VK_BLEND_OP_MIN ||
+ a->colorBlendOp == VK_BLEND_OP_MAX) {
+ blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
+ blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
+ }
+ if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
+ a->alphaBlendOp == VK_BLEND_OP_MAX) {
+ blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
+ blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
+ }
+ }
+
+#if GEN_GEN >= 8
+ struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
+ blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
+ blend.HasWriteableRT = has_writeable_rt;
+ blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable;
+ blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor;
+ blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor;
+ blend.SourceBlendFactor = bs0->SourceBlendFactor;
+ blend.DestinationBlendFactor = bs0->DestinationBlendFactor;
+ blend.AlphaTestEnable = false;
+ blend.IndependentAlphaBlendEnable =
+ blend_state.IndependentAlphaBlendEnable;
+ }
+#else
+ (void)has_writeable_rt;
+#endif
+
+ GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
+ if (!device->info.has_llc)
+ anv_state_clflush(pipeline->blend_state);
+
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+ bsp.BlendStatePointer = pipeline->blend_state.offset;
+#if GEN_GEN >= 8
+ bsp.BlendStatePointerValid = true;
+#endif
+ }
+}
+
+static void
+emit_3dstate_clip(struct anv_pipeline *pipeline,
+ const VkPipelineViewportStateCreateInfo *vp_info,
+ const VkPipelineRasterizationStateCreateInfo *rs_info,
+ const struct anv_graphics_pipeline_create_info *extra)
+{
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+ (void) wm_prog_data;
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
+ clip.ClipEnable = !(extra && extra->use_rectlist);
+ clip.EarlyCullEnable = true;
+ clip.APIMode = APIMODE_D3D,
+ clip.ViewportXYClipTestEnable = true;
+
+ clip.ClipMode = rs_info->rasterizerDiscardEnable ?
+ CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
+
+ clip.TriangleStripListProvokingVertexSelect = 0;
+ clip.LineStripListProvokingVertexSelect = 0;
+ clip.TriangleFanProvokingVertexSelect = 1;
+
+ clip.MinimumPointWidth = 0.125;
+ clip.MaximumPointWidth = 255.875;
+ clip.MaximumVPIndex = vp_info->viewportCount - 1;
+
+#if GEN_GEN == 7
+ clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
+ clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
+ clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
+#else
+ clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
+ (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
+#endif
+ }
+}
#include "xmlpool.h"
#endif
#endif
-#ifdef HAVE_SYSFS
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
#endif
#include "loader.h"
}
fd = xcb_dri3_open_reply_fds(conn, reply)[0];
+ free(reply);
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
return fd;
* Chia-I Wu <olv@lunarg.com>
*/
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
__asm__(".text\n"
".balign 32\n"
{
}
-static char
-x86_64_entry_start[];
+extern char
+x86_64_entry_start[] HIDDEN;
mapi_func
entry_get_public(int slot)
#include <string.h>
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
+
__asm__(".text");
__asm__("x86_current_tls:\n\t"
extern unsigned long
x86_current_tls();
-static char x86_entry_start[];
-static char x86_entry_end[];
+extern char x86_entry_start[] HIDDEN;
+extern char x86_entry_end[] HIDDEN;
void
entry_patch_public(void)
* Chia-I Wu <olv@lunarg.com>
*/
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
#define X86_ENTRY_SIZE 32
#include <string.h>
#include "u_execmem.h"
-static const char x86_entry_start[];
-static const char x86_entry_end[];
+extern const char x86_entry_start[] HIDDEN;
+extern const char x86_entry_end[] HIDDEN;
void
entry_patch_public(void)
-I$(top_builddir)/src \
-I$(top_srcdir)/src \
-I$(top_builddir)/src/compiler/nir \
- -I$(top_srcdir)/src/mesa \
-I$(top_builddir)/src/mesa \
- -I$(top_srcdir)/src/mesa/main \
+ -I$(top_srcdir)/src/mesa \
-I$(top_builddir)/src/mesa/main \
- -I$(top_srcdir)/src/mapi \
+ -I$(top_srcdir)/src/mesa/main \
-I$(top_builddir)/src/mapi \
+ -I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary
env.Append(CPPPATH = [
'../compiler/nir', # for generated nir_opcodes.h, etc
'#/src',
+ Dir('../mapi'), # src/mapi build path
'#/src/mapi',
'#/src/glsl',
+ Dir('.'), # src/mesa build path
'#/src/mesa',
'#/src/gallium/include',
'#/src/gallium/auxiliary',
- Dir('../mapi'), # src/mapi build path
- Dir('.'), # src/mesa build path
])
if env['platform'] == 'windows':
)
# Add the dir containing the generated header (somewhere inside the
# build dir) to the include path
- env.Append(CPPPATH = [matypes[0].dir])
+ env.Prepend(CPPPATH = [matypes[0].dir])
def write_git_sha1_h_file(filename):
struct __DRIimageList images;
unsigned int format;
uint32_t buffer_mask = 0;
+ int ret;
front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
if (back_rb)
buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
- (*screen->image.loader->getBuffers) (drawable,
- driGLFormatToImageFormat(format),
- &drawable->dri2.stamp,
- drawable->loaderPrivate,
- buffer_mask,
- &images);
+ ret = screen->image.loader->getBuffers(drawable,
+ driGLFormatToImageFormat(format),
+ &drawable->dri2.stamp,
+ drawable->loaderPrivate,
+ buffer_mask,
+ &images);
+ if (!ret)
+ return;
if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
drawable->w = images.front->width;
brw_nir_attribute_workarounds.c \
brw_nir_intrinsics.c \
brw_nir_opt_peephole_ffma.c \
+ brw_nir_tcs_workarounds.c \
brw_packed_float.c \
brw_predicated_break.cpp \
brw_reg.h \
/** A bitfield of per-vertex outputs written. */
uint64_t outputs_written;
+ bool quads_workaround;
+
struct brw_sampler_prog_key_data tex;
};
* have to multiply by VerticesIn to obtain the total storage requirement.
*/
if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
- max_push_components) {
+ max_push_components || gs_prog_data->invocations > 1) {
gs_prog_data->base.include_vue_handles = true;
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
break;
fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t));
+ fs_reg m0_2 = component(m0, 2);
- const fs_builder fwa_bld = bld.exec_all();
+ const fs_builder chanbld = bld.exec_all().group(1, 0);
/* Zero the message header */
- fwa_bld.MOV(m0, brw_imm_ud(0u));
+ bld.exec_all().MOV(m0, brw_imm_ud(0u));
/* Copy "Barrier ID" from r0.2, bits 16:13 */
- fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(INTEL_MASK(16, 13)));
/* Shift it up to bits 27:24. */
- fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11));
+ chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
/* Set the Barrier Count and the enable bit */
- fwa_bld.OR(m0_2, m0_2,
- brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
+ chanbld.OR(m0_2, m0_2,
+ brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
break;
dest = get_nir_dest(instr->dest);
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
- fs_reg offset = get_nir_src(instr->src[0]);
+ fs_reg offset;
fs_reg data1 = get_nir_src(instr->src[1]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[2]);
+ /* Get the offset */
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset = vgrf(glsl_type::uint_type);
+ bld.ADD(offset,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
/* Emit the actual atomic operation operation */
fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
bool brw_nir_apply_trig_workarounds(nir_shader *nir);
+void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
+
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct brw_sampler_prog_key_data *key,
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "brw_nir.h"
+
+/**
+ * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
+ * definition of the patch header layouts):
+ *
+ * "HW Bug: The Tessellation stage will incorrectly add domain points
+ * along patch edges under the following conditions, which may result
+ * in conformance failures and/or cracking artifacts:
+ *
+ * * QUAD domain
+ * * INTEGER partitioning
+ * * All three TessFactors in a given U or V direction (e.g., V
+ * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
+ * * All three TessFactors in the other direction are > 1.0 and all
+ * round up to the same integer value (e.g, U direction:
+ * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
+ *
+ * The suggested workaround (to be implemented as part of the postamble
+ * to the HS shader in the HS kernel) is:
+ *
+ * if (
+ * (TF[UEQ0] > 1.0) ||
+ * (TF[VEQ0] > 1.0) ||
+ * (TF[UEQ1] > 1.0) ||
+ * (TF[VEQ1] > 1.0) ||
+ * (TF[INSIDE_U] > 1.0) ||
+ * (TF[INSIDE_V] > 1.0) )
+ * {
+ * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
+ * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
+ * }"
+ *
+ * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
+ * that the above workaround fails to fix certain GL/ES CTS tests which
+ * have inside tessellation factors of -1.0. This can be explained by
+ * a quote from the ARB_tessellation_shader specification:
+ *
+ * "If "equal_spacing" is used, the floating-point tessellation level is
+ * first clamped to the range [1,<max>], where <max> is implementation-
+ * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
+ *
+ * In other words, the actual inner tessellation factor used is
+ * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
+ * value against 1.0. To accomplish this, we change the comparison from
+ * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
+ */
+
+static inline nir_ssa_def *
+load_output(nir_builder *b, int num_components, int offset)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
+ load->num_components = num_components;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_intrinsic_set_base(load, offset);
+
+ nir_builder_instr_insert(b, &load->instr);
+
+ return &load->dest.ssa;
+}
+
+static inline void
+store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
+{
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ store->num_components = comps;
+ nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
+ store->src[0] = nir_src_for_ssa(value);
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+emit_quads_workaround(nir_builder *b, nir_block *block)
+{
+ /* We're going to insert a new if-statement in a predecessor of the end
+ * block. This would normally create a new block (after the if) which
+ * would then become the predecessor of the end block, causing our set
+ * walking to get screwed up. To avoid this, just emit a constant at
+ * the end of our current block, and insert the if before that.
+ */
+ b->cursor = nir_after_block_before_jump(block);
+ b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr);
+
+ nir_ssa_def *inner = load_output(b, 2, 0);
+ nir_ssa_def *outer = load_output(b, 4, 1);
+
+ nir_ssa_def *any_greater_than_1 =
+ nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
+ nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
+
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
+ nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+ /* Fill out the new then-block */
+ b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+ store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
+ nir_imm_float(b, 2.0f), inner), 0, 2);
+}
+
+void
+brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
+{
+ assert(nir->stage == MESA_SHADER_TESS_CTRL);
+
+ nir_foreach_function(func, nir) {
+ if (!func->impl)
+ continue;
+
+ nir_builder b;
+ nir_builder_init(&b, func->impl);
+
+ struct set_entry *entry;
+ set_foreach(func->impl->end_block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ emit_quads_workaround(&b, pred);
+ }
+
+ nir_metadata_preserve(func->impl, 0);
+ }
+}
* The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
* including the required PIPE_CONTROL flushes.
*
- * Sandybridge is the worst case scenario: brw_emit_mi_flush
- * expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush
- * before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add
- * the 3 DWords for MI_REPORT_PERF_COUNT itself.
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush expands to four
+ * PIPE_CONTROLs which are 5 DWords each. We have to flush before and after
+ * MI_REPORT_PERF_COUNT, so multiply by two. Finally, add the 3 DWords for
+ * MI_REPORT_PERF_COUNT itself.
*/
-#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3)
+#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (4 * 5) + 3)
/**
* Emit an MI_REPORT_PERF_COUNT command packet.
void
brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
{
+ if (brw->gen >= 6 &&
+ (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
+ (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
+ /* A pipe control command with flush and invalidate bits set
+ * simultaneously is an inherently racy operation on Gen6+ if the
+ * contents of the flushed caches were intended to become visible from
+ * any of the invalidated caches. Split it in two PIPE_CONTROLs, the
+ * first one should stall the pipeline to make sure that the flushed R/W
+ * caches are coherent with memory once the specified R/O caches are
+ * invalidated. On pre-Gen6 hardware the (implicit) R/O cache
+ * invalidation seems to happen at the bottom of the pipeline together
+ * with any write cache flush, so this shouldn't be a concern.
+ */
+ brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
+ PIPE_CONTROL_CS_STALL);
+ flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
+ }
+
if (brw->gen >= 8) {
if (brw->gen == 8)
gen8_add_cs_stall_workaround_bits(&flags);
+ if (brw->gen == 9 &&
+ (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
+ /* Hardware workaround: SKL
+ *
+ * Emit Pipe Control with all bits set to zero before emitting
+ * a Pipe Control with VF Cache Invalidate set.
+ */
+ brw_emit_pipe_control_flush(brw, 0);
+ }
+
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
OUT_BATCH(flags);
} else {
int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
if (brw->gen >= 6) {
- if (brw->gen == 9) {
- /* Hardware workaround: SKL
- *
- * Emit Pipe Control with all bits set to zero before emitting
- * a Pipe Control with VF Cache Invalidate set.
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
key->patch_outputs_written);
found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
key->tes_primitive_mode);
+ found |= key_debug(brw, "quads and equal_spacing workaround",
+ old_key->quads_workaround, key->quads_workaround);
found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
if (!found) {
* based on the domain the DS is expecting to tessellate.
*/
key.tes_primitive_mode = tep->program.PrimitiveMode;
+ key.quads_workaround = brw->gen < 9 &&
+ tep->program.PrimitiveMode == GL_QUADS &&
+ tep->program.Spacing == GL_EQUAL;
if (tcp) {
key.program_string_id = tcp->id;
struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog;
struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp);
+ const struct gl_shader *tes =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
memset(&key, 0, sizeof(key));
if (brw->gen < 8)
key.input_vertices = shader_prog->TessCtrl.VerticesOut;
- key.tes_primitive_mode =
- shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ?
- shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES;
+ if (tes) {
+ key.tes_primitive_mode = shader_prog->TessEval.PrimitiveMode;
+ key.quads_workaround = brw->gen < 9 &&
+ shader_prog->TessEval.PrimitiveMode == GL_QUADS &&
+ shader_prog->TessEval.Spacing == GL_EQUAL;
+ } else {
+ key.tes_primitive_mode = GL_TRIANGLES;
+ }
key.outputs_written = prog->OutputsWritten;
key.patch_outputs_written = prog->PatchOutputsWritten;
{
bool progress = false;
- int last_reg = -1, last_reg_offset = -1;
- enum brw_reg_file last_reg_file = BAD_FILE;
+ foreach_block(block, cfg) {
+ int last_reg = -1, last_reg_offset = -1;
+ enum brw_reg_file last_reg_file = BAD_FILE;
+
+ uint8_t imm[4] = { 0 };
+ int inst_count = 0;
+ vec4_instruction *imm_inst[4];
+ unsigned writemask = 0;
+ enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+
+ foreach_inst_in_block_safe(vec4_instruction, inst, block) {
+ int vf = -1;
+ enum brw_reg_type need_type;
+
+ /* Look for unconditional MOVs from an immediate with a partial
+ * writemask. Skip type-conversion MOVs other than integer 0,
+ * where the type doesn't matter. See if the immediate can be
+ * represented as a VF.
+ */
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->src[0].file == IMM &&
+ inst->predicate == BRW_PREDICATE_NONE &&
+ inst->dst.writemask != WRITEMASK_XYZW &&
+ (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
+
+ vf = brw_float_to_vf(inst->src[0].d);
+ need_type = BRW_REGISTER_TYPE_D;
+
+ if (vf == -1) {
+ vf = brw_float_to_vf(inst->src[0].f);
+ need_type = BRW_REGISTER_TYPE_F;
+ }
+ } else {
+ last_reg = -1;
+ }
- uint8_t imm[4] = { 0 };
- int inst_count = 0;
- vec4_instruction *imm_inst[4];
- unsigned writemask = 0;
- enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+ /* If this wasn't a MOV, or the destination register doesn't match,
+ * or we have to switch destination types, then this breaks our
+ * sequence. Combine anything we've accumulated so far.
+ */
+ if (last_reg != inst->dst.nr ||
+ last_reg_offset != inst->dst.reg_offset ||
+ last_reg_file != inst->dst.file ||
+ (vf > 0 && dest_type != need_type)) {
+
+ if (inst_count > 1) {
+ unsigned vf;
+ memcpy(&vf, imm, sizeof(vf));
+ vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
+ mov->dst.type = dest_type;
+ mov->dst.writemask = writemask;
+ inst->insert_before(block, mov);
+
+ for (int i = 0; i < inst_count; i++) {
+ imm_inst[i]->remove(block);
+ }
- foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- int vf = -1;
- enum brw_reg_type need_type;
+ progress = true;
+ }
- /* Look for unconditional MOVs from an immediate with a partial
- * writemask. Skip type-conversion MOVs other than integer 0,
- * where the type doesn't matter. See if the immediate can be
- * represented as a VF.
- */
- if (inst->opcode == BRW_OPCODE_MOV &&
- inst->src[0].file == IMM &&
- inst->predicate == BRW_PREDICATE_NONE &&
- inst->dst.writemask != WRITEMASK_XYZW &&
- (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
-
- vf = brw_float_to_vf(inst->src[0].d);
- need_type = BRW_REGISTER_TYPE_D;
-
- if (vf == -1) {
- vf = brw_float_to_vf(inst->src[0].f);
- need_type = BRW_REGISTER_TYPE_F;
- }
- } else {
- last_reg = -1;
- }
+ inst_count = 0;
+ last_reg = -1;
+ writemask = 0;
+ dest_type = BRW_REGISTER_TYPE_F;
- /* If this wasn't a MOV, or the destination register doesn't match,
- * or we have to switch destination types, then this breaks our
- * sequence. Combine anything we've accumulated so far.
- */
- if (last_reg != inst->dst.nr ||
- last_reg_offset != inst->dst.reg_offset ||
- last_reg_file != inst->dst.file ||
- (vf > 0 && dest_type != need_type)) {
-
- if (inst_count > 1) {
- unsigned vf;
- memcpy(&vf, imm, sizeof(vf));
- vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
- mov->dst.type = dest_type;
- mov->dst.writemask = writemask;
- inst->insert_before(block, mov);
-
- for (int i = 0; i < inst_count; i++) {
- imm_inst[i]->remove(block);
+ for (int i = 0; i < 4; i++) {
+ imm[i] = 0;
}
-
- progress = true;
}
- inst_count = 0;
- last_reg = -1;
- writemask = 0;
- dest_type = BRW_REGISTER_TYPE_F;
-
- for (int i = 0; i < 4; i++) {
- imm[i] = 0;
+ /* Record this instruction's value (if it was representable). */
+ if (vf != -1) {
+ if ((inst->dst.writemask & WRITEMASK_X) != 0)
+ imm[0] = vf;
+ if ((inst->dst.writemask & WRITEMASK_Y) != 0)
+ imm[1] = vf;
+ if ((inst->dst.writemask & WRITEMASK_Z) != 0)
+ imm[2] = vf;
+ if ((inst->dst.writemask & WRITEMASK_W) != 0)
+ imm[3] = vf;
+
+ writemask |= inst->dst.writemask;
+ imm_inst[inst_count++] = inst;
+
+ last_reg = inst->dst.nr;
+ last_reg_offset = inst->dst.reg_offset;
+ last_reg_file = inst->dst.file;
+ if (vf > 0)
+ dest_type = need_type;
}
}
-
- /* Record this instruction's value (if it was representable). */
- if (vf != -1) {
- if ((inst->dst.writemask & WRITEMASK_X) != 0)
- imm[0] = vf;
- if ((inst->dst.writemask & WRITEMASK_Y) != 0)
- imm[1] = vf;
- if ((inst->dst.writemask & WRITEMASK_Z) != 0)
- imm[2] = vf;
- if ((inst->dst.writemask & WRITEMASK_W) != 0)
- imm[3] = vf;
-
- writemask |= inst->dst.writemask;
- imm_inst[inst_count++] = inst;
-
- last_reg = inst->dst.nr;
- last_reg_offset = inst->dst.reg_offset;
- last_reg_file = inst->dst.file;
- if (vf > 0)
- dest_type = need_type;
- }
}
if (progress)
/* Can't coalesce this GRF if someone else was going to
* read it later.
*/
- if (var_range_end(var_from_reg(alloc, inst->src[0]), 4) > ip)
+ if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip)
continue;
/* We need to check interference with the final destination between this
* more -- a sure sign they'll fail operands_match().
*/
if (src->file == VGRF) {
- if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
+ if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) {
entry->remove();
ralloc_free(entry);
break;
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
+ if (key->quads_workaround)
+ brw_nir_apply_tcs_quads_workaround(nir);
+
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
if (is_scalar)
* - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
* - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
* - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
- * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB,
- * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes
+ * - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB,
+ * which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes
* - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes.
* On Ironlake, it's 6 DWords, but we have some slack due to the lack of
* Sandybridge PIPE_CONTROL madness.
- * - CC_STATE workaround on HSW (12 * 4 = 48 bytes)
- * - 5 dwords for initial mi_flush
+ * - CC_STATE workaround on HSW (17 * 4 = 68 bytes)
+ * - 10 dwords for initial mi_flush
* - 2 dwords for CC state setup
* - 5 dwords for the required pipe control at the end
* - Restoring L3 configuration: (24 dwords = 96 bytes)
* - 7 dwords for L3 configuration set-up.
* - 5 dwords for L3 atomic set-up (on HSW).
*/
-#define BATCH_RESERVED 248
+#define BATCH_RESERVED 308
struct intel_batchbuffer;
if (!irb->mt)
return;
+ /* Adjust the miptree's upper-left coordinate.
+ *
+ * FIXME: Adjusting the miptree's layout outside of
+ * intel_miptree_create_layout() is fragile. Plumb the adjustment through
+ * intel_miptree_create_layout() and brw_tex_layout().
+ */
+ irb->mt->level[0].level_x = image->tile_x;
+ irb->mt->level[0].level_y = image->tile_y;
+ irb->mt->level[0].slice[0].x_offset = image->tile_x;
+ irb->mt->level[0].slice[0].y_offset = image->tile_y;
+ irb->mt->total_width += image->tile_x;
+ irb->mt->total_height += image->tile_y;
+
rb->InternalFormat = image->internal_format;
rb->Width = image->width;
rb->Height = image->height;
_mesa_get_format_name(format),
first_level, last_level, depth0, mt);
- if (target == GL_TEXTURE_1D_ARRAY) {
- /* For a 1D Array texture the OpenGL API will treat the height0
- * parameter as the number of array slices. For Intel hardware, we treat
- * the 1D array as a 2D Array with a height of 1.
- *
- * So, when we first come through this path to create a 1D Array
- * texture, height0 stores the number of slices, and depth0 is 1. In
- * this case, we want to swap height0 and depth0.
- *
- * Since some miptrees will be created based on the base miptree, we may
- * come through this path and see height0 as 1 and depth0 being the
- * number of slices. In this case we don't need to do the swap.
- */
- assert(height0 == 1 || depth0 == 1);
- if (height0 > 1) {
- depth0 = height0;
- height0 = 1;
- }
- }
+ if (target == GL_TEXTURE_1D_ARRAY)
+ assert(height0 == 1);
mt->target = target;
mt->format = format;
* as a 2D Array with a height of 1. So, here we want to swap image
* height and depth.
*/
+ assert(image->Depth == 1);
*width = image->Width;
*height = 1;
*depth = image->Height;
hz_height = DIV_ROUND_UP(hz_height, 2);
} else {
const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align);
- if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
- mt->target == GL_TEXTURE_CUBE_MAP) {
- /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 */
- hz_height = DIV_ROUND_UP(hz_qpitch * Z0 * 6, 2 * 8) * 8;
- } else {
- /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
- hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
- }
+ /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
+ hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
}
unsigned long pitch;
} else {
/* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
hz_height = DIV_ROUND_UP(buf->qpitch, 2 * 8) * 8 * Z0;
- if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
- mt->target == GL_TEXTURE_CUBE_MAP) {
- /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * 6 * Z_Depth
- *
- * We can can just take our hz_height calculation from above, and
- * multiply by 6 for the cube map and cube map array types.
- */
- hz_height *= 6;
- }
}
unsigned long pitch;
if (ctx->_ImageTransferState)
return false;
- /* This renderbuffer can come from a texture. In this case, we impose
- * some of the same restrictions we have for textures and adjust for
- * miplevels.
- */
- if (rb->TexImage) {
- if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D &&
- rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE)
- return false;
-
- int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel;
-
- /* Adjust x and y offset based on miplevel */
- xoffset += irb->mt->level[level].level_x;
- yoffset += irb->mt->level[level].level_y;
- }
-
/* It is possible that the renderbuffer (or underlying texture) is
* multisampled. Since ReadPixels from a multisampled buffer requires a
* multisample resolve, we can't handle this here
return false;
}
+ xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset;
+ yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset;
+
dst_pitch = _mesa_image_row_stride(pack, width, format, type);
/* For a window-system renderbuffer, the buffer is actually flipped
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
- bo->virtual,
+ bo->virtual + irb->mt->offset,
dst_pitch, irb->mt->pitch,
brw->has_swizzling,
irb->mt->tiling,
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+#define PIPE_CONTROL_CACHE_FLUSH_BITS \
+ (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
+ PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
+ (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
+ PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
/** @} */
#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22))
/** The fence waits for completion of this batch. */
drm_intel_bo *batch_bo;
+ mtx_t mutex;
bool signalled;
};
}
static bool
-brw_fence_has_completed(struct brw_fence *fence)
+brw_fence_has_completed_locked(struct brw_fence *fence)
{
if (fence->signalled)
return true;
return false;
}
-/**
- * Return true if the function successfully signals or has already signalled.
- * (This matches the behavior expected from __DRI2fence::client_wait_sync).
- */
static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
- uint64_t timeout)
+brw_fence_has_completed(struct brw_fence *fence)
+{
+ bool ret;
+
+ mtx_lock(&fence->mutex);
+ ret = brw_fence_has_completed_locked(fence);
+ mtx_unlock(&fence->mutex);
+
+ return ret;
+}
+
+static bool
+brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
+ uint64_t timeout)
{
if (fence->signalled)
return true;
return true;
}
+/**
+ * Return true if the function successfully signals or has already signalled.
+ * (This matches the behavior expected from __DRI2fence::client_wait_sync).
+ */
+static bool
+brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
+ uint64_t timeout)
+{
+ bool ret;
+
+ mtx_lock(&fence->mutex);
+ ret = brw_fence_client_wait_locked(brw, fence, timeout);
+ mtx_unlock(&fence->mutex);
+
+ return ret;
+}
+
static void
brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
{
if (!fence)
return NULL;
+ mtx_init(&fence->mutex, mtx_plain);
fence->brw = brw;
brw_fence_insert(brw, fence);
{
struct brw_fence *fence = driver_fence;
+ /* We might be called here with a NULL fence as a result of WaitSyncKHR
+ * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
+ */
+ if (!fence)
+ return;
+
brw_fence_server_wait(fence->brw, fence);
}
!intel_miptree_match_image(intel_texobj->mt, first_image) ||
intel_texobj->mt->last_level != levels - 1) {
intel_miptree_release(&intel_texobj->mt);
+
+ intel_get_image_dims(first_image, &width, &height, &depth);
intel_texobj->mt = intel_miptree_create(brw, texobj->Target,
first_image->TexFormat,
0, levels - 1,
PUSH_DATA (push, in >> 32);
BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1);
- PUSH_DATA (push, n);
+ PUSH_DATA (push, MAX2(1, n));
}
xrb->map_mode = mode;
xrb->map_x = x;
- xrb->map_y = y;
+ xrb->map_y = rb->Height - y - h;
xrb->map_w = w;
xrb->map_h = h;
stride = w * cpp;
xrb->Base.Buffer = malloc(h * stride);
- sPriv->swrast_loader->getImage(dPriv, x, rb->Height - y - h, w, h,
+ sPriv->swrast_loader->getImage(dPriv, x, xrb->map_y, w, h,
(char *) xrb->Base.Buffer,
dPriv->loaderPrivate);
GLuint file:4;
GLint idx:9; /* relative addressing may be negative */
/* sizeof(idx) should == sizeof(prog_src_reg::Index) */
- GLuint abs:1;
GLuint negate:1;
GLuint swz:12;
- GLuint pad:5;
+ GLuint pad:6;
};
0,
0,
0,
- 0,
0
};
struct ureg reg;
reg.file = file;
reg.idx = idx;
- reg.abs = 0;
reg.negate = 0;
reg.swz = SWIZZLE_NOOP;
reg.pad = 0;
}
-
-static struct ureg absolute( struct ureg reg )
-{
- reg.abs = 1;
- reg.negate = 0;
- return reg;
-}
-
-
static struct ureg negate( struct ureg reg )
{
reg.negate ^= 1;
emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
- emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
+ emit_op1(p, OPCODE_ABS, spot, 0, spot);
+ emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
release_temp(p, spot);
* not specified with an unsized internal format from table 8.3 or a
* sized internal format that is both color-renderable and
* texture-filterable according to table 8.10."
+ *
+ * GL_EXT_texture_format_BGRA8888 adds a GL_BGRA_EXT unsized internal
+ * format, and includes it in a very similar looking table. So we
+ * include it here as well.
*/
return internalformat == GL_RGBA || internalformat == GL_RGB ||
internalformat == GL_LUMINANCE_ALPHA ||
internalformat == GL_LUMINANCE || internalformat == GL_ALPHA ||
+ internalformat == GL_BGRA_EXT ||
(_mesa_is_es3_color_renderable(internalformat) &&
_mesa_is_es3_texture_filterable(internalformat));
}
return;
}
+ if (srcImage->Width == 0 || srcImage->Height == 0) {
+ _mesa_unlock_texture(ctx, texObj);
+ return;
+ }
+
if (target == GL_TEXTURE_CUBE_MAP) {
GLuint face;
for (face = 0; face < 6; face++) {
static const int extra_ARB_gpu_shader5_or_OES_sample_variables[] = {
EXT(ARB_gpu_shader5),
EXT(OES_sample_variables),
+ EXTRA_END
};
EXTRA_EXT(ARB_texture_cube_map);
}
/**
+ * Test if the given format is an ETC2 format.
+ */
+GLboolean
+_mesa_is_etc2_format(GLenum internalFormat)
+{
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB8_ETC2:
+ case GL_COMPRESSED_SRGB8_ETC2:
+ case GL_COMPRESSED_RGBA8_ETC2_EAC:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+ case GL_COMPRESSED_R11_EAC:
+ case GL_COMPRESSED_RG11_EAC:
+ case GL_COMPRESSED_SIGNED_R11_EAC:
+ case GL_COMPRESSED_SIGNED_RG11_EAC:
+ case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
* Test if the given format is an integer (non-normalized) format.
*/
GLboolean
_mesa_is_astc_format(GLenum internalFormat);
extern GLboolean
+_mesa_is_etc2_format(GLenum internalFormat);
+
+extern GLboolean
_mesa_is_type_unsigned(GLenum type);
extern GLboolean
static bool
validate_io(struct gl_shader_program *producer,
- struct gl_shader_program *consumer)
+ struct gl_shader_program *consumer,
+ gl_shader_stage producer_stage,
+ gl_shader_stage consumer_stage)
{
if (producer == consumer)
return true;
+ const bool nonarray_stage_to_array_stage =
+ producer_stage != MESA_SHADER_TESS_CTRL &&
+ (consumer_stage == MESA_SHADER_GEOMETRY ||
+ consumer_stage == MESA_SHADER_TESS_CTRL ||
+ consumer_stage == MESA_SHADER_TESS_EVAL);
+
bool valid = true;
+ void *name_buffer = NULL;
+ size_t name_buffer_size = 0;
+
gl_shader_variable const **outputs =
(gl_shader_variable const **) calloc(producer->NumProgramResourceList,
sizeof(gl_shader_variable *));
}
}
} else {
+ char *consumer_name = consumer_var->name;
+
+ if (nonarray_stage_to_array_stage &&
+ consumer_var->interface_type != NULL &&
+ consumer_var->interface_type->is_array() &&
+ !is_gl_identifier(consumer_var->name)) {
+ const size_t name_len = strlen(consumer_var->name);
+
+ if (name_len >= name_buffer_size) {
+ free(name_buffer);
+
+ name_buffer_size = name_len + 1;
+ name_buffer = malloc(name_buffer_size);
+ if (name_buffer == NULL) {
+ valid = false;
+ goto out;
+ }
+ }
+
+ consumer_name = (char *) name_buffer;
+
+ char *s = strchr(consumer_var->name, '[');
+ if (s == NULL) {
+ valid = false;
+ goto out;
+ }
+
+ char *t = strchr(s, ']');
+ if (t == NULL) {
+ valid = false;
+ goto out;
+ }
+
+ assert(t[1] == '.' || t[1] == '[');
+
+ const ptrdiff_t base_name_len = s - consumer_var->name;
+
+ memcpy(consumer_name, consumer_var->name, base_name_len);
+ strcpy(consumer_name + base_name_len, t + 1);
+ }
+
for (unsigned j = 0; j < num_outputs; j++) {
const gl_shader_variable *const var = outputs[j];
if (!var->explicit_location &&
- strcmp(consumer_var->name, var->name) == 0) {
+ strcmp(consumer_name, var->name) == 0) {
producer_var = var;
match_index = j;
break;
* Note that location mismatches are detected by the loops above that
* find the producer variable that goes with the consumer variable.
*/
- if (producer_var->type != consumer_var->type ||
- producer_var->interpolation != consumer_var->interpolation ||
- producer_var->precision != consumer_var->precision) {
+ if (nonarray_stage_to_array_stage) {
+ if (!consumer_var->type->is_array() ||
+ consumer_var->type->fields.array != producer_var->type) {
+ valid = false;
+ goto out;
+ }
+
+ if (consumer_var->interface_type != NULL) {
+ if (!consumer_var->interface_type->is_array() ||
+ consumer_var->interface_type->fields.array != producer_var->interface_type) {
+ valid = false;
+ goto out;
+ }
+ } else if (producer_var->interface_type != NULL) {
+ valid = false;
+ goto out;
+ }
+ } else {
+ if (producer_var->type != consumer_var->type) {
+ valid = false;
+ goto out;
+ }
+
+ if (producer_var->interface_type != consumer_var->interface_type) {
+ valid = false;
+ goto out;
+ }
+ }
+
+ if (producer_var->interpolation != consumer_var->interpolation) {
valid = false;
goto out;
}
- if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) {
+ if (producer_var->precision != consumer_var->precision) {
valid = false;
goto out;
}
- if (producer_var->interface_type != consumer_var->interface_type) {
+ if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) {
valid = false;
goto out;
}
}
out:
+ free(name_buffer);
free(outputs);
return valid && num_outputs == 0;
}
if (shProg[idx]->_LinkedShaders[idx]->Stage == MESA_SHADER_COMPUTE)
break;
- if (!validate_io(shProg[prev], shProg[idx]))
+ if (!validate_io(shProg[prev], shProg[idx],
+ shProg[prev]->_LinkedShaders[prev]->Stage,
+ shProg[idx]->_LinkedShaders[idx]->Stage))
return false;
prev = idx;
*/
if (format == rgba_format) {
rgba = dest;
- } else if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+ } else {
need_convert = true;
- rgba = malloc(height * rgba_stride);
- if (!rgba) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
- return;
+ if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+ rgba = malloc(height * rgba_stride);
+ if (!rgba) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
+ ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ return;
+ }
}
}
_mesa_format_no_online_compression(const struct gl_context *ctx, GLenum format)
{
return _mesa_is_astc_format(format) ||
+ _mesa_is_etc2_format(format) ||
compressedteximage_only_format(ctx, format);
}
att->Texture == texObj &&
att->TextureLevel == level &&
att->CubeMapFace == face) {
- _mesa_update_texture_renderbuffer(ctx, ctx->DrawBuffer, att);
+ _mesa_update_texture_renderbuffer(ctx, fb, att);
assert(att->Renderbuffer->TexImage);
/* Mark fb status as indeterminate to force re-validation */
fb->_Status = 0;
+
+ /* Make sure that the revalidation actually happens if this is
+ * being done to currently-bound buffers.
+ */
+ if (fb == ctx->DrawBuffer || fb == ctx->ReadBuffer)
+ ctx->NewState |= _NEW_BUFFERS;
}
}
}
return;
}
- _mesa_init_teximage_fields(ctx, texImage,
- 0, 0, 0, 0, /* w, h, d, border */
- GL_NONE, MESA_FORMAT_NONE);
+ _mesa_clear_texture_image(ctx, texImage);
}
}
}
#include "main/enums.h"
#include "main/formats.h"
#include "main/glformats.h"
+#include "main/texcompress.h"
#include "main/texgetimage.h"
#include "main/teximage.h"
#include "main/texstore.h"
}
if (pFormat == PIPE_FORMAT_NONE) {
+ /* lie about using etc1/etc2 natively if we do decoding tricks */
+ mFormat = _mesa_glenum_to_compressed_format(internalFormat);
+ if ((mFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1) ||
+ (_mesa_is_format_etc2(mFormat) && !st->has_etc2))
+ return mFormat;
+
/* no luck at all */
return MESA_FORMAT_NONE;
}
uval[0].u = *(uint32_t *)&val;
uval[1].u = *(((uint32_t *)&val) + 1);
src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
-
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
return src;
}
emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
case ir_unop_bitcast_f2i:
- result_src = op[0];
- result_src.type = GLSL_TYPE_INT;
- break;
case ir_unop_bitcast_f2u:
- result_src = op[0];
- result_src.type = GLSL_TYPE_UINT;
+ /* Make sure we don't propagate the negate modifier to integer opcodes. */
+ if (op[0].negate)
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ else
+ result_src = op[0];
+ result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT :
+ GLSL_TYPE_UINT;
break;
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
inputSemanticName[i], inputSemanticIndex[i],
interpMode[i], 0, interpLocation[i],
array_id, array_size);
- i += array_size - 1;
+
+ GLuint base_attr = inputSlotToAttr[i];
+ while (i + 1 < numInputs &&
+ inputSlotToAttr[i + 1] < base_attr + array_size)
+ ++i;
}
else {
t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
struct pipe_video_buffer *buffer;
struct pipe_sampler_view **samplers;
+ struct pipe_resource *res = NULL;
getProcAddr = (void *)ctx->vdpGetProcAddress;
if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f))
if (!sv)
return NULL;
- return sv->texture;
+ pipe_resource_reference(&res, sv->texture);
+ return res;
}
static struct pipe_resource *
{
int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr);
uint32_t device = (uintptr_t)ctx->vdpDevice;
+ struct pipe_resource *res = NULL;
VdpOutputSurfaceGallium *f;
getProcAddr = (void *)ctx->vdpGetProcAddress;
if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f))
return NULL;
- return f((uintptr_t)vdpSurface);
+ pipe_resource_reference(&res, f((uintptr_t)vdpSurface));
+ return res;
}
static struct pipe_resource *
/* do we have different screen objects ? */
if (res->screen != st->pipe->screen) {
_mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
+ pipe_resource_reference(&res, NULL);
return;
}
stObj->surface_format = res->format;
_mesa_dirty_texobj(ctx, texObj);
+ pipe_resource_reference(&res, NULL);
}
static void
prim[0].basevertex = basevertex;
prim[0].num_instances = numInstances;
prim[0].base_instance = baseInstance;
+ prim[0].draw_id = 0;
/* Need to give special consideration to rendering a range of
* indices starting somewhere above zero. Typically the
* then emitting an indexed prim at runtime.
*/
static void GLAPIENTRY
-_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
- const GLvoid * indices)
+_save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid * indices, GLint basevertex)
{
GET_CURRENT_CONTEXT(ctx);
struct vbo_save_context *save = &vbo_context(ctx)->save;
switch (type) {
case GL_UNSIGNED_BYTE:
for (i = 0; i < count; i++)
- CALL_ArrayElement(GET_DISPATCH(), (((GLubyte *) indices)[i]));
+ CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLubyte *) indices)[i]));
break;
case GL_UNSIGNED_SHORT:
for (i = 0; i < count; i++)
- CALL_ArrayElement(GET_DISPATCH(), (((GLushort *) indices)[i]));
+ CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLushort *) indices)[i]));
break;
case GL_UNSIGNED_INT:
for (i = 0; i < count; i++)
- CALL_ArrayElement(GET_DISPATCH(), (((GLuint *) indices)[i]));
+ CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLuint *) indices)[i]));
break;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glDrawElements(type)");
_ae_unmap_vbos(ctx);
}
+static void GLAPIENTRY
+_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid * indices)
+{
+ _save_OBE_DrawElementsBaseVertex(mode, count, type, indices, 0);
+}
+
static void GLAPIENTRY
_save_OBE_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
{
SET_DrawArrays(exec, _save_OBE_DrawArrays);
SET_DrawElements(exec, _save_OBE_DrawElements);
+ SET_DrawElementsBaseVertex(exec, _save_OBE_DrawElementsBaseVertex);
SET_DrawRangeElements(exec, _save_OBE_DrawRangeElements);
SET_MultiDrawElementsEXT(exec, _save_OBE_MultiDrawElements);
SET_MultiDrawElementsBaseVertex(exec, _save_OBE_MultiDrawElementsBaseVertex);
static GLuint
elt(struct copy_context *copy, GLuint elt_idx)
{
- GLuint elt = copy->srcelt[elt_idx];
+ GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
GLuint slot = elt & (ELT_TABLE_SIZE-1);
/* printf("elt %d\n", elt); */