OSDN Git Service

Merge remote-tracking branch 'mesa/18.3' into oreo-x86 android-x86-8.1-r2
authorChih-Wei Huang <cwhuang@linux.org.tw>
Thu, 25 Apr 2019 03:36:11 +0000 (11:36 +0800)
committerChih-Wei Huang <cwhuang@linux.org.tw>
Thu, 25 Apr 2019 03:36:11 +0000 (11:36 +0800)
95 files changed:
VERSION
bin/.cherry-ignore
bin/install_megadrivers.py
docs/relnotes/18.3.5.html [new file with mode: 0644]
docs/relnotes/18.3.6.html [new file with mode: 0644]
meson.build
scons/custom.py
scons/gallium.py
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_descriptor_set.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_formats.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.h
src/compiler/glsl/ast_to_hir.cpp
src/compiler/glsl/ir.cpp
src/compiler/glsl/ir.h
src/compiler/glsl/ir_print_visitor.cpp
src/compiler/glsl/ir_reader.cpp
src/compiler/glsl/link_uniforms.cpp
src/compiler/glsl/link_varyings.cpp
src/compiler/glsl/linker.cpp
src/compiler/glsl/linker.h
src/compiler/glsl/list.h
src/compiler/glsl/lower_vector_derefs.cpp
src/compiler/glsl/serialize.cpp
src/compiler/glsl_types.cpp
src/compiler/glsl_types.h
src/compiler/nir/nir_gather_xfb_info.c
src/compiler/nir/nir_opt_copy_prop_vars.c
src/compiler/nir_types.cpp
src/compiler/nir_types.h
src/compiler/spirv/spirv_to_nir.c
src/compiler/spirv/vtn_variables.c
src/egl/drivers/dri2/egl_dri2.c
src/egl/drivers/dri2/platform_wayland.c
src/egl/main/egldevice.c
src/egl/main/egldevice.h
src/egl/meson.build
src/gallium/auxiliary/tgsi/tgsi_scan.h
src/gallium/drivers/radeon/radeon_vcn_dec.c
src/gallium/drivers/radeonsi/driinfo_radeonsi.h
src/gallium/drivers/radeonsi/si_compute_blit.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_state_viewport.c
src/gallium/drivers/softpipe/sp_tile_cache.c
src/gallium/drivers/swr/swr_screen.cpp
src/gallium/state_trackers/nine/nine_pipe.h
src/gallium/state_trackers/nine/swapchain9.c
src/gallium/targets/d3dadapter9/meson.build
src/gallium/targets/dri/meson.build
src/gallium/targets/omx/meson.build
src/gallium/targets/va/meson.build
src/gallium/targets/vdpau/meson.build
src/gallium/targets/xa/meson.build
src/gallium/targets/xvmc/meson.build
src/glx/dri3_glx.c
src/glx/dri3_priv.h
src/glx/drisw_glx.c
src/intel/Android.vulkan.mk
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_fs_nir.cpp
src/intel/dev/gen_device_info.c
src/intel/genxml/gen10.xml
src/intel/genxml/gen11.xml
src/intel/genxml/gen7.xml
src/intel/genxml/gen75.xml
src/intel/genxml/gen8.xml
src/intel/genxml/gen9.xml
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
src/intel/vulkan/anv_pass.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_pipeline.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/genX_state_upload.c
src/mesa/drivers/dri/i965/intel_extensions.c
src/mesa/drivers/dri/meson.build
src/mesa/main/errors.c
src/mesa/main/get.c
src/mesa/program/prog_parameter.c
src/mesa/program/prog_parameter.h
src/mesa/state_tracker/st_glsl_to_nir.cpp
src/util/00-mesa-defaults.conf
src/util/xmlpool/t_options.h
src/vulkan/util/meson.build

diff --git a/VERSION b/VERSION
index 0548779..d63efa7 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-18.3.4
+18.3.6
index d69c2ca..1041997 100644 (file)
@@ -36,3 +36,19 @@ cbd1ad6165f0aea7fb7c6fd1b36ad5317dd65cb7 st/mesa: require RGBA2, RGB4, and RGBA4
 
 # stable The commit addresses functionality not present in branch
 1b8983c25be19073c02fe9630e949be55f8280fa radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8
+
+# stable Explicit 19.0 only nominations, require work which did not land in branch
+8ab95b849e66f3221d80a67eef2ec6e3730901a8 anv: Refactor descriptor pushing a bit
+5c30fffeec1732c21d600c036f95f8cdb1bb5487 anv: Take references to push descriptor set layouts
+
+# sha    Commit references invalid sha - a offender did not land in branch.
+85ee157283c667372baf7c03259cba08853f0067 gitlab-ci: autotools needs to be told which llvm version to use
+
+# revert The commit was reverted shortly after it was applied on master
+47fc359822494935852de1e70e4d840b2fe6a25c anv: release memory allocated by glsl types during spirv_to_nir
+
+# stable Explicitly 19.0 nomination, seemingly a performance patch
+5ef2b8f1f2ebcdb4ffe5c98b3f4f48e584cb4b22 nir: Add a pass for lowering IO back to vector when possible
+
+# fixes Temporary block perf. fix depending on previous opt. pass
+6d5d89d25a0a4299dbfcbfeca71b6c7e65ef3d45 intel/nir: Vectorize all IO
index d29b191..c9ab99a 100644 (file)
@@ -35,7 +35,11 @@ def main():
     args = parser.parse_args()
 
     if os.path.isabs(args.libdir):
-        to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
+        destdir = os.environ.get('DESTDIR')
+        if destdir:
+            to = os.path.join(destdir, args.libdir[1:])
+        else:
+            to = args.libdir
     else:
         to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)
 
@@ -45,7 +49,6 @@ def main():
         if os.path.lexists(to):
             os.unlink(to)
         os.makedirs(to)
-    shutil.copy(args.megadriver, master)
 
     for driver in args.drivers:
         abs_driver = os.path.join(to, driver)
diff --git a/docs/relnotes/18.3.5.html b/docs/relnotes/18.3.5.html
new file mode 100644 (file)
index 0000000..ce862f4
--- /dev/null
@@ -0,0 +1,271 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.5 Release Notes / March 18, 2019</h1>
+
+<p>
+Mesa 18.3.5 is a bug fix release which fixes bugs found since the 18.3.4 release.
+</p>
+<p>
+Mesa 18.3.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+5f40a336cb2af9b1d66fa243bb03c2c8a3f9b3f067aab6aaaad4316d1bc0e58b  mesa-18.3.5.tar.gz
+4027aea82cc63240b3fcf60eec9eea882955f098c989b29357b01d1695747953  mesa-18.3.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104297">Bug 104297</a> - [i965] Downward causes GPU hangs and misrendering on Haswell</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107052">Bug 107052</a> - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the &quot;true&quot; flag in &quot;drirc&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108457">Bug 108457</a> - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108999">Bug 108999</a> - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328">Bug 109328</a> - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109443">Bug 109443</a> - Build failure with MSVC when using Scons &gt;= 3.0.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109451">Bug 109451</a> - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109594">Bug 109594</a> - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v &lt;= max' no se cumple.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109597">Bug 109597</a> - wreckfest issues with transparent objects &amp; skybox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109601">Bug 109601</a> - [Regression] RuneLite GPU rendering broken on 18.3.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109735">Bug 109735</a> - [Regression] broken font with mesa_vulkan_overlay</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alok Hota (1):</p>
+<ul>
+  <li>swr/rast: bypass size limit for non-sampled textures</li>
+</ul>
+
+<p>Andrii Simiklit (1):</p>
+<ul>
+  <li>i965: re-emit index buffer state on a reset option change.</li>
+</ul>
+
+<p>Axel Davy (2):</p>
+<ul>
+  <li>st/nine: Ignore window size if error</li>
+  <li>st/nine: Ignore multisample quality level if no ms</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Sync ETC2 whitelisted devices.</li>
+  <li>radv: Fix float16 interpolation set up.</li>
+  <li>radv: Allow interpolation on non-float types.</li>
+  <li>radv: Interpolate less aggressively.</li>
+</ul>
+
+<p>Carlos Garnacho (1):</p>
+<ul>
+  <li>wayland/egl: Ensure EGL surface is resized on DRI update_buffers()</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>glsl/linker: Fix unmatched TCS outputs being reduced to local variable</li>
+</ul>
+
+<p>David Shao (1):</p>
+<ul>
+  <li>meson: ensure that xmlpool_options.h is generated for gallium targets that need it</li>
+</ul>
+
+<p>Eleni Maria Stea (1):</p>
+<ul>
+  <li>i965: fixed clamping in set_scissor_bits when the y is flipped</li>
+</ul>
+
+<p>Emil Velikov (7):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.4</li>
+  <li>meson: egl: correctly manage loader/xmlconfig</li>
+  <li>cherry-ignore: add 19.0 only anv/push buffer nominations</li>
+  <li>cherry-ignore: add gitlab-ci fixup commit</li>
+  <li>cherry-ignore: ignore glsl_types memory cleanup patch</li>
+  <li>cherry-ignore: add explicit 19.0 performance optimisations</li>
+  <li>Update version to 18.3.5</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>egl: fix libdrm-less builds</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>intel/fs: Implement extended strides greater than 4 for IR source regions.</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>intel/fs: nir_op_extract_i8 extracts a byte, not a word</li>
+  <li>intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>glsl: fix recording of variables for XFB in TCS shaders</li>
+</ul>
+
+<p>Jason Ekstrand (10):</p>
+<ul>
+  <li>intel/fs: Bail in optimize_extract_to_float if we have modifiers</li>
+  <li>compiler/types: Add a contains_64bit helper</li>
+  <li>nir/xfb: Properly align 64-bit values</li>
+  <li>nir/xfb: Work in terms of components rather than slots</li>
+  <li>nir/xfb: Handle compact arrays in gather_xfb_info</li>
+  <li>anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport</li>
+  <li>spirv: OpImageQueryLod requires a sampler</li>
+  <li>spirv: Pull offset/stride from the pointer for OpArrayLength</li>
+  <li>glsl/list: Add a list variant of insert_after</li>
+  <li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
+</ul>
+
+<p>Jose Maria Casanova Crespo (1):</p>
+<ul>
+  <li>glsl: TCS outputs can not be transform feedback candidates on GLES</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>scons: Workaround failures with MSVC when using SCons 3.0.[2-4].</li>
+</ul>
+
+<p>Juan A. Suarez Romero (3):</p>
+<ul>
+  <li>genxml: add missing field values for 3DSTATE_SF</li>
+  <li>anv: advertise 8 subpixel precision bits</li>
+  <li>anv: destroy descriptor sets when pool gets reset</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>egl/dri: Avoid out of bounds array access</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>intel: fix urb size for CFL GT1</li>
+</ul>
+
+<p>Marek Olšák (5):</p>
+<ul>
+  <li>radeonsi: add driconf option radeonsi_enable_nir</li>
+  <li>radeonsi: always enable NIR for Civilization 6 to fix corruption</li>
+  <li>driconf: add Civ6Sub executable for Civilization 6</li>
+  <li>tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics</li>
+  <li>radeonsi: compile clear and copy buffer compute shaders on demand</li>
+</ul>
+
+<p>Mauro Rossi (2):</p>
+<ul>
+  <li>android: anv: fix generated files depedencies (v2)</li>
+  <li>android: anv: fix libexpat shared dependency</li>
+</ul>
+
+<p>Ray Zhang (1):</p>
+<ul>
+  <li>glx: fix shared memory leak in X11</li>
+</ul>
+
+<p>Rhys Perry (2):</p>
+<ul>
+  <li>radv: bitcast 16-bit outputs to integers</li>
+  <li>radv: ensure export arguments are always float</li>
+</ul>
+
+<p>Samuel Pitoiset (8):</p>
+<ul>
+  <li>radv: write the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix clearing attachments in secondary command buffers</li>
+  <li>radv: fix out-of-bounds access when copying descriptors BO list</li>
+  <li>radv: don't copy buffer descriptors list for samplers</li>
+  <li>radv: properly align the fence and EOP bug VA on GFX9</li>
+  <li>radv: fix pointSizeRange limits</li>
+  <li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
+</ul>
+
+<p>Sergii Romantsov (2):</p>
+<ul>
+  <li>dri: meson: do not prefix user provided dri-drivers-path</li>
+  <li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
+</ul>
+
+<p>Tapani Pälli (3):</p>
+<ul>
+  <li>nir: initialize value in copy_prop_vars_block</li>
+  <li>anv: retain the is_array state in create_plane_tex_instr_implicit</li>
+  <li>anv: destroy descriptor sets when pool gets destroyed</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>glsl: fix shader cache for packed param list</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0</li>
+</ul>
+
+<p>pal1000 (1):</p>
+<ul>
+  <li>scons: Compatibility with Scons development version string</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/18.3.6.html b/docs/relnotes/18.3.6.html
new file mode 100644 (file)
index 0000000..09f037b
--- /dev/null
@@ -0,0 +1,169 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.6 Release Notes / April 5, 2019</h1>
+
+<p>
+Mesa 18.3.6 is a bug fix release which fixes bugs found since the 18.3.5 release.
+</p>
+<p>
+Mesa 18.3.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4619d92afadf7072f7956599a2ccd0934fc45b4ddbc2eb865bdcb50ddf963f87  mesa-18.3.6.tar.gz
+aaf17638dcf5a90b93b6389e152fdc9ef147768b09598f24d2c5cf482fcfc705  mesa-18.3.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100316">Bug 100316</a> - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108766">Bug 108766</a> - Mesa built with meson has RPATH entries</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109648">Bug 109648</a> - AMD Raven hang during va-api decoding</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109980">Bug 109980</a> - [i915 CI][HSW] spec&#64;arb_fragment_shader_interlock&#64;arb_fragment_shader_interlock-image-load-store - fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110211">Bug 110211</a> - If DESTDIR is set to an empty string, the dri drivers are not installed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110221">Bug 110221</a> - build error with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110259">Bug 110259</a> - radv: Sampling depth-stencil image in GENERAL layout returns nothing but zero (regression, bisected)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (4):</p>
+<ul>
+  <li>glsl: correctly validate component layout qualifier for dvec{3,4}</li>
+  <li>glsl/linker: don't fail non static used inputs without matching outputs</li>
+  <li>glsl/linker: simplify xfb_offset vs xfb_stride overflow check</li>
+  <li>Revert "glsl: relax input-&gt;output validation for SSO programs"</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Use correct image view comparison for fast clears.</li>
+  <li>ac/nir: Return frag_coord as integer.</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>glsl: Cross validate variable's invariance by explicit invariance only</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>softpipe: fix texture view crashes</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>bin/install_megadrivers.py: Correctly handle DESTDIR=''</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.5</li>
+  <li>Update version to 18.3.6</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>dri3: Return the current swap interval from glXGetSwapIntervalMESA().</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>meson: strip rpath from megadrivers</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>anv/pass: Flag the need for a RT flush for resolve attachments</li>
+  <li>Revert "anv/radv: release memory allocated by glsl types during spirv_to_nir"</li>
+</ul>
+
+<p>Józef Kucia (2):</p>
+<ul>
+  <li>mesa: Fix GL_NUM_DEVICE_UUIDS_EXT</li>
+  <li>radv: Fix driverUUID</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>radeon/vcn: add H.264 constrained baseline support</li>
+  <li>radeon/vcn/vp9: search the render target from the whole list</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: fix assertion failure by using the correct type</li>
+</ul>
+
+<p>Mark Janes (1):</p>
+<ul>
+  <li>mesa: properly report the length of truncated log messages</li>
+</ul>
+
+<p>Plamena Manolova (1):</p>
+<ul>
+  <li>i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv: fix binding transform feedback buffers</li>
+  <li>radv: do not always initialize HTILE in compressed state</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>anv/radv: release memory allocated by glsl types during spirv_to_nir</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>st/glsl_to_nir: fix incorrect arrary access</li>
+</ul>
+
+<p>Tobias Klausmann (1):</p>
+<ul>
+  <li>vulkan/util: meson build - add wayland client include</li>
+</ul>
+
+</div>
+</body>
+</html>
+
+
index f11c556..64bd109 100644 (file)
@@ -64,11 +64,11 @@ endif
 
 dri_drivers_path = get_option('dri-drivers-path')
 if dri_drivers_path == ''
-  dri_drivers_path = join_paths(get_option('libdir'), 'dri')
+  dri_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'dri')
 endif
 dri_search_path = get_option('dri-search-path')
 if dri_search_path == ''
-  dri_search_path = join_paths(get_option('prefix'), dri_drivers_path)
+  dri_search_path = dri_drivers_path
 endif
 
 with_gles1 = get_option('gles1')
@@ -615,7 +615,7 @@ with_gallium_xa = _xa != 'false'
 
 d3d_drivers_path = get_option('d3d-drivers-path')
 if d3d_drivers_path == ''
-  d3d_drivers_path = join_paths(get_option('libdir'), 'd3d')
+  d3d_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'd3d')
 endif
 
 with_gallium_st_nine =  get_option('gallium-nine')
index 09946fa..8028990 100644 (file)
@@ -48,7 +48,12 @@ import source_list
 # a path directly. We want to support both, so we need to detect the SCons version,
 # for which no API is provided by SCons 8-P
 
-scons_version = tuple(map(int, SCons.__version__.split('.')))
+# Scons version string has consistently been in this format:
+# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
+# so this formula should cover all versions regardless of type
+# stable, alpha or beta.
+# For simplicity alpha and beta flags are removed.
+scons_version = tuple(map(int, SCons.__version__.split('.')[:3]))
 
 def quietCommandLines(env):
     # Quiet command lines
index 963834a..61bbeb2 100755 (executable)
@@ -308,7 +308,20 @@ def generate(env):
     if env.GetOption('num_jobs') <= 1:
         env.SetOption('num_jobs', num_jobs())
 
-    env.Decider('MD5-timestamp')
+    # Speed up dependency checking.  See
+    # - https://github.com/SCons/scons/wiki/GoFastButton
+    # - https://bugs.freedesktop.org/show_bug.cgi?id=109443
+
+    # Scons version string has consistently been in this format:
+    # MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
+    # so this formula should cover all versions regardless of type
+    # stable, alpha or beta.
+    # For simplicity alpha and beta flags are removed.
+
+    scons_version = distutils.version.StrictVersion('.'.join(SCons.__version__.split('.')[:3]))
+    if scons_version < distutils.version.StrictVersion('3.0.2') or \
+       scons_version > distutils.version.StrictVersion('3.0.4'):
+        env.Decider('MD5-timestamp')
     env.SetOption('max_drift', 60)
 
     # C preprocessor options
index 8953da7..69480cb 100644 (file)
@@ -897,6 +897,37 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
 }
 
 LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+                      LLVMValueRef llvm_chan,
+                      LLVMValueRef attr_number,
+                      LLVMValueRef params,
+                      LLVMValueRef i,
+                      LLVMValueRef j)
+{
+       LLVMValueRef args[6];
+       LLVMValueRef p1;
+
+       args[0] = i;
+       args[1] = llvm_chan;
+       args[2] = attr_number;
+       args[3] = ctx->i1false;
+       args[4] = params;
+
+       p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+                               ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+
+       args[0] = p1;
+       args[1] = j;
+       args[2] = llvm_chan;
+       args[3] = attr_number;
+       args[4] = ctx->i1false;
+       args[5] = params;
+
+       return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+                                 ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
                       LLVMValueRef parameter,
                       LLVMValueRef llvm_chan,
index 1275e4f..4df8a17 100644 (file)
@@ -218,6 +218,14 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
                   LLVMValueRef j);
 
 LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+                      LLVMValueRef llvm_chan,
+                      LLVMValueRef attr_number,
+                      LLVMValueRef params,
+                      LLVMValueRef i,
+                      LLVMValueRef j);
+
+LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
                       LLVMValueRef parameter,
                       LLVMValueRef llvm_chan,
index 593ca71..4a8bad6 100644 (file)
@@ -3032,7 +3032,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
                        ctx->abi->frag_pos[2],
                        ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
                };
-               result = ac_build_gather_values(&ctx->ac, values, 4);
+               result = ac_to_integer(&ctx->ac,
+                                      ac_build_gather_values(&ctx->ac, values, 4));
                break;
        }
        case nir_intrinsic_load_front_face:
index 4ebb01c..24954a9 100644 (file)
@@ -338,13 +338,13 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
                unsigned eop_bug_offset;
                void *fence_ptr;
 
-               radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
+               radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8,
                                             &cmd_buffer->gfx9_fence_offset,
                                             &fence_ptr);
                cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
 
                /* Allocate a buffer for the EOP bug on GFX9. */
-               radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+               radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
                                             &eop_bug_offset, &fence_ptr);
                cmd_buffer->gfx9_eop_bug_va =
                        radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -414,6 +414,8 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
                             unsigned *out_offset,
                             void **ptr)
 {
+       assert(util_is_power_of_two_nonzero(alignment));
+
        uint64_t offset = align(cmd_buffer->upload.offset, alignment);
        if (offset + size > cmd_buffer->upload.size) {
                if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
@@ -4243,10 +4245,15 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
        if (!radv_image_has_htile(image))
                return;
 
-       if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
-                  radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
-               /* TODO: merge with the clear if applicable */
-               radv_initialize_htile(cmd_buffer, image, range, 0);
+       if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+               uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
+
+               if (radv_layout_is_htile_compressed(image, dst_layout,
+                                                   dst_queue_mask)) {
+                       clear_value = 0;
+               }
+
+               radv_initialize_htile(cmd_buffer, image, range, clear_value);
        } else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
                   radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
                uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
@@ -4709,7 +4716,7 @@ void radv_CmdBindTransformFeedbackBuffersEXT(
                enabled_mask |= 1 << idx;
        }
 
-       cmd_buffer->state.streamout.enabled_mask = enabled_mask;
+       cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
 
        cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
 }
index 6ee949c..7f7d682 100644 (file)
@@ -965,9 +965,11 @@ void radv_update_descriptor_sets(
                        }
                        src_ptr += src_binding_layout->size / 4;
                        dst_ptr += dst_binding_layout->size / 4;
-                       dst_buffer_list[j] = src_buffer_list[j];
-                       ++src_buffer_list;
-                       ++dst_buffer_list;
+
+                       if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
+                               /* Sampler descriptors don't have a buffer list. */
+                               dst_buffer_list[j] = src_buffer_list[j];
+                       }
                }
        }
 }
index a3b1884..8da7551 100644 (file)
@@ -329,7 +329,7 @@ radv_physical_device_init(struct radv_physical_device *device,
            device->rad_info.chip_class > GFX9)
                fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 
-       radv_get_driver_uuid(&device->device_uuid);
+       radv_get_driver_uuid(&device->driver_uuid);
        radv_get_device_uuid(&device->rad_info, &device->device_uuid);
 
        if (device->rad_info.family == CHIP_STONEY ||
@@ -726,8 +726,7 @@ void radv_GetPhysicalDeviceFeatures(
                .alphaToOne                               = true,
                .multiViewport                            = true,
                .samplerAnisotropy                        = true,
-               .textureCompressionETC2                   = pdevice->rad_info.chip_class >= GFX9 ||
-                                                           pdevice->rad_info.family == CHIP_STONEY,
+               .textureCompressionETC2                   = radv_device_supports_etc(pdevice),
                .textureCompressionASTC_LDR               = false,
                .textureCompressionBC                     = true,
                .occlusionQueryPrecise                    = true,
@@ -794,7 +793,7 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->storageBuffer16BitAccess = enabled;
                        features->uniformAndStorageBuffer16BitAccess = enabled;
                        features->storagePushConstant16 = enabled;
-                       features->storageInputOutput16 = enabled;
+                       features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
                        break;
                }
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
@@ -978,7 +977,7 @@ void radv_GetPhysicalDeviceProperties(
                .maxCullDistances                         = 8,
                .maxCombinedClipAndCullDistances          = 8,
                .discreteQueuePriorities                  = 2,
-               .pointSizeRange                           = { 0.125, 255.875 },
+               .pointSizeRange                           = { 0.0, 8192.0 },
                .lineWidthRange                           = { 0.0, 7.9921875 },
                .pointSizeGranularity                     = (1.0 / 8.0),
                .lineWidthGranularity                     = (1.0 / 128.0),
index 59bc46d..5d727d0 100644 (file)
@@ -595,6 +595,14 @@ static bool radv_is_filter_minmax_format_supported(VkFormat format)
        }
 }
 
+bool
+radv_device_supports_etc(struct radv_physical_device *physical_device)
+{
+       return physical_device->rad_info.family == CHIP_VEGA10 ||
+              physical_device->rad_info.family == CHIP_RAVEN ||
+              physical_device->rad_info.family == CHIP_STONEY;
+}
+
 static void
 radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
                                           VkFormat format,
@@ -612,9 +620,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
        }
 
        if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
-           physical_device->rad_info.family != CHIP_VEGA10 &&
-           physical_device->rad_info.family != CHIP_RAVEN &&
-           physical_device->rad_info.family != CHIP_STONEY) {
+           !radv_device_supports_etc(physical_device)) {
                out_properties->linearTilingFeatures = linear;
                out_properties->optimalTilingFeatures = tiled;
                out_properties->bufferFeatures = buffer;
index c8a7f80..c05da7a 100644 (file)
@@ -352,14 +352,29 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
        const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
        const uint32_t subpass_att = clear_att->colorAttachment;
        const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
-       const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-       const uint32_t samples = iview->image->info.samples;
-       const uint32_t samples_log2 = ffs(samples) - 1;
-       unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
+       const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+       uint32_t samples, samples_log2;
+       VkFormat format;
+       unsigned fs_key;
        VkClearColorValue clear_value = clear_att->clearValue.color;
        VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
        VkPipeline pipeline;
 
+       /* When a framebuffer is bound to the current command buffer, get the
+        * number of samples from it. Otherwise, get the number of samples from
+        * the render pass because it's likely a secondary command buffer.
+        */
+       if (iview) {
+               samples = iview->image->info.samples;
+               format = iview->vk_format;
+       } else {
+               samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+               format = cmd_buffer->state.pass->attachments[pass_att].format;
+       }
+
+       samples_log2 = ffs(samples) - 1;
+       fs_key = radv_format_meta_fs_key(format);
+
        if (fs_key == -1) {
                radv_finishme("color clears incomplete");
                return;
@@ -599,6 +614,9 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
                                      const VkClearRect *clear_rect,
                                      VkClearDepthStencilValue clear_value)
 {
+       if (!iview)
+               return false;
+
        uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
                                                           cmd_buffer->queue_family_index,
                                                           cmd_buffer->queue_family_index);
@@ -615,7 +633,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
            iview->base_mip == 0 &&
            iview->base_layer == 0 &&
            radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
-           !radv_image_extent_compare(iview->image, &iview->extent))
+           radv_image_extent_compare(iview->image, &iview->extent))
                return true;
        return false;
 }
@@ -686,11 +704,22 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
        const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
        VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
        VkImageAspectFlags aspects = clear_att->aspectMask;
-       const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-       const uint32_t samples = iview->image->info.samples;
-       const uint32_t samples_log2 = ffs(samples) - 1;
+       const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+       uint32_t samples, samples_log2;
        VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
 
+       /* When a framebuffer is bound to the current command buffer, get the
+        * number of samples from it. Otherwise, get the number of samples from
+        * the render pass because it's likely a secondary command buffer.
+        */
+       if (iview) {
+               samples = iview->image->info.samples;
+       } else {
+               samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+       }
+
+       samples_log2 = ffs(samples) - 1;
+
        assert(pass_att != VK_ATTACHMENT_UNUSED);
 
        if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
@@ -757,11 +786,14 @@ emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
        const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
        VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
        const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-       const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
+       const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
        VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
        VkImageAspectFlags aspects = clear_att->aspectMask;
        uint32_t clear_word, flush_bits;
 
+       if (!iview)
+               return false;
+
        if (!radv_image_has_htile(iview->image))
                return false;
 
@@ -1059,12 +1091,15 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
        const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
        VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
        const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-       const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
+       const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
        VkClearColorValue clear_value = clear_att->clearValue.color;
        uint32_t clear_color[2], flush_bits = 0;
        uint32_t cmask_clear_value;
        bool ret;
 
+       if (!iview)
+               return false;
+
        if (!radv_image_has_cmask(iview->image) && !radv_image_has_dcc(iview->image))
                return false;
 
index 58da610..cda2a1e 100644 (file)
@@ -94,6 +94,7 @@ struct radv_shader_context {
        gl_shader_stage stage;
 
        LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
+       uint64_t float16_shaded_mask;
 
        uint64_t input_mask;
        uint64_t output_mask;
@@ -2097,6 +2098,7 @@ static void interp_fs_input(struct radv_shader_context *ctx,
                            unsigned attr,
                            LLVMValueRef interp_param,
                            LLVMValueRef prim_mask,
+                           bool float16,
                            LLVMValueRef result[4])
 {
        LLVMValueRef attr_number;
@@ -2129,7 +2131,12 @@ static void interp_fs_input(struct radv_shader_context *ctx,
        for (chan = 0; chan < 4; chan++) {
                LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-               if (interp) {
+               if (interp && float16) {
+                       result[chan] = ac_build_fs_interp_f16(&ctx->ac,
+                                                             llvm_chan,
+                                                             attr_number,
+                                                             prim_mask, i, j);
+               } else if (interp) {
                        result[chan] = ac_build_fs_interp(&ctx->ac,
                                                          llvm_chan,
                                                          attr_number,
@@ -2141,7 +2148,30 @@ static void interp_fs_input(struct radv_shader_context *ctx,
                                                              attr_number,
                                                              prim_mask);
                        result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
-                       result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
+                       result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, "");
+               }
+       }
+}
+
+static void mark_16bit_fs_input(struct radv_shader_context *ctx,
+                                const struct glsl_type *type,
+                                int location)
+{
+       if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
+               unsigned attrib_count = glsl_count_attribute_slots(type, false);
+               if (glsl_type_is_16bit(type)) {
+                       ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
+               }
+       } else if (glsl_type_is_array(type)) {
+               unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
+               for (unsigned i = 0; i < glsl_get_length(type); ++i) {
+                       mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride);
+               }
+       } else {
+               assert(glsl_type_is_struct(type));
+               for (unsigned i = 0; i < glsl_get_length(type); i++) {
+                       mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location);
+                       location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
                }
        }
 }
@@ -2156,9 +2186,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
        uint64_t mask;
 
        variable->data.driver_location = idx * 4;
+
+       if (!variable->data.compact)
+               mark_16bit_fs_input(ctx, variable->type, idx);
+
        mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
-       if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
+       if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT ||
+           glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT16 ||
+           glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_STRUCT) {
                unsigned interp_type;
                if (variable->data.sample)
                        interp_type = INTERP_SAMPLE;
@@ -2169,10 +2205,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
 
                interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
        }
-       bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
-       LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
        if (interp == NULL)
-               interp = LLVMGetUndef(type);
+               interp = LLVMGetUndef(ctx->ac.i32);
 
        for (unsigned i = 0; i < attrib_count; ++i)
                ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
@@ -2246,11 +2280,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
                if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
                    i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
                        interp_param = *inputs;
-                       interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
+                       bool float16 = (ctx->float16_shaded_mask >> i) & 1;
+                       interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16,
                                        inputs);
 
                        if (LLVMIsUndef(interp_param))
                                ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
+                       if (float16)
+                               ctx->shader_info->fs.float16_shaded_mask |= 1u << index;
                        if (i >= VARYING_SLOT_VAR0)
                                ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
                        ++index;
@@ -2262,7 +2299,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
 
                                interp_param = *inputs;
                                interp_fs_input(ctx, index, interp_param,
-                                               ctx->abi.prim_mask, inputs);
+                                               ctx->abi.prim_mask, false, inputs);
                                ++index;
                        }
                } else if (i == VARYING_SLOT_POS) {
@@ -2411,7 +2448,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
                        if (is_16bit) {
                                for (unsigned chan = 0; chan < 4; chan++)
                                        values[chan] = LLVMBuildZExt(ctx->ac.builder,
-                                                                     values[chan],
+                                                                     ac_to_integer(&ctx->ac, values[chan]),
                                                                      ctx->ac.i32, "");
                        }
                        break;
@@ -2422,7 +2459,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
                        if (is_16bit) {
                                for (unsigned chan = 0; chan < 4; chan++)
                                        values[chan] = LLVMBuildSExt(ctx->ac.builder,
-                                                                     values[chan],
+                                                                     ac_to_integer(&ctx->ac, values[chan]),
                                                                      ctx->ac.i32, "");
                        }
                        break;
@@ -2475,12 +2512,8 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
        } else
                memcpy(&args->out[0], values, sizeof(values[0]) * 4);
 
-       for (unsigned i = 0; i < 4; ++i) {
-               if (!(args->enabled_channels & (1 << i)))
-                       continue;
-
+       for (unsigned i = 0; i < 4; ++i)
                args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
-       }
 }
 
 static void
index 48d67f3..86d78e8 100644 (file)
@@ -524,6 +524,14 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
                col_format |= cf << (4 * i);
        }
 
+       if (!col_format && blend->need_src_alpha & (1 << 0)) {
+               /* When a subpass doesn't have any color attachments, write the
+                * alpha channel of MRT0 when alpha coverage is enabled because
+                * the depth attachment needs it.
+                */
+               col_format |= V_028714_SPI_SHADER_32_ABGR;
+       }
+
        /* If the i-th target format is set, all previous target formats must
         * be non-zero to avoid hangs.
         */
@@ -688,6 +696,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 
        if (vkms && vkms->alphaToCoverageEnable) {
                blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+               blend.need_src_alpha |= 0x1;
        }
 
        blend.cb_target_mask = 0;
@@ -3066,13 +3075,17 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs,
        radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader);
 }
 
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
 {
        uint32_t ps_input_cntl;
        if (offset <= AC_EXP_PARAM_OFFSET_31) {
                ps_input_cntl = S_028644_OFFSET(offset);
                if (flat_shade)
                        ps_input_cntl |= S_028644_FLAT_SHADE(1);
+               if (float16) {
+                       ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
+                                        S_028644_ATTR0_VALID(1);
+               }
        } else {
                /* The input is a DEFAULT_VAL constant. */
                assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
@@ -3097,7 +3110,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
        if (ps->info.info.ps.prim_id_input) {
                unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
                        ++ps_offset;
                }
        }
@@ -3107,9 +3120,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
            ps->info.info.needs_multiview_view_index) {
                unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED)
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
                else
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
                ++ps_offset;
        }
 
@@ -3125,14 +3138,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
 
                vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
                        ++ps_offset;
                }
 
                vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
                if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
                    ps->info.info.ps.num_input_clips_culls > 4) {
-                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+                       ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
                        ++ps_offset;
                }
        }
@@ -3140,6 +3153,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
        for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
                unsigned vs_offset;
                bool flat_shade;
+               bool float16;
                if (!(ps->info.fs.input_mask & (1u << i)))
                        continue;
 
@@ -3151,8 +3165,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
                }
 
                flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
+               float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
 
-               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
+               ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
                ++ps_offset;
        }
 
index 585702a..c0861ea 100644 (file)
@@ -1447,6 +1447,7 @@ bool radv_format_pack_clear_color(VkFormat format,
 bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
 bool radv_dcc_formats_compatible(VkFormat format1,
                                  VkFormat format2);
+bool radv_device_supports_etc(struct radv_physical_device *physical_device);
 
 struct radv_fmask_info {
        uint64_t offset;
index a1d38b3..263c0e0 100644 (file)
@@ -258,6 +258,7 @@ struct radv_shader_variant_info {
                        unsigned num_interp;
                        uint32_t input_mask;
                        uint32_t flat_shaded_mask;
+                       uint32_t float16_shaded_mask;
                        bool can_discard;
                        bool early_fragment_test;
                } fs;
index f4bd8c1..edbda4f 100644 (file)
@@ -3699,6 +3699,10 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
                                 "cannot be applied to a matrix, a structure, "
                                 "a block, or an array containing any of "
                                 "these.");
+            } else if (components > 4 && type->is_64bit()) {
+               _mesa_glsl_error(loc, state, "component layout qualifier "
+                                "cannot be applied to dvec%u.",
+                                components / 2);
             } else if (qual_component != 0 &&
                 (qual_component + components - 1) > 3) {
                _mesa_glsl_error(loc, state, "component overflow (%u > 3)",
@@ -3941,7 +3945,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                           "`invariant' after being used",
                           var->name);
       } else {
-         var->data.invariant = 1;
+         var->data.explicit_invariant = true;
+         var->data.invariant = true;
       }
    }
 
@@ -4149,8 +4154,10 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
       }
    }
 
-   if (state->all_invariant && var->data.mode == ir_var_shader_out)
+   if (state->all_invariant && var->data.mode == ir_var_shader_out) {
+      var->data.explicit_invariant = true;
       var->data.invariant = true;
+   }
 
    var->data.interpolation =
       interpret_interpolation_qualifier(qual, var->type,
@@ -4863,6 +4870,7 @@ ast_declarator_list::hir(exec_list *instructions,
                             "`invariant' after being used",
                             earlier->name);
          } else {
+            earlier->data.explicit_invariant = true;
             earlier->data.invariant = true;
          }
       }
index 1d1a56a..f5aa1be 100644 (file)
@@ -1734,6 +1734,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
    this->data.centroid = false;
    this->data.sample = false;
    this->data.patch = false;
+   this->data.explicit_invariant = false;
    this->data.invariant = false;
    this->data.how_declared = ir_var_declared_normally;
    this->data.mode = mode;
index d05d199..a9f68b1 100644 (file)
@@ -657,6 +657,19 @@ public:
       unsigned centroid:1;
       unsigned sample:1;
       unsigned patch:1;
+      /**
+       * Was an 'invariant' qualifier explicitly set in the shader?
+       *
+       * This is used to cross validate qualifiers.
+       */
+      unsigned explicit_invariant:1;
+      /**
+       * Is the variable invariant?
+       *
+       * It can happen either by having the 'invariant' qualifier
+       * explicitly set in the shader or by being used in calculations
+       * of other invariant variables.
+       */
       unsigned invariant:1;
       unsigned precise:1;
 
index 1626657..4eb1586 100644 (file)
@@ -200,6 +200,7 @@ void ir_print_visitor::visit(ir_variable *ir)
    const char *const samp = (ir->data.sample) ? "sample " : "";
    const char *const patc = (ir->data.patch) ? "patch " : "";
    const char *const inv = (ir->data.invariant) ? "invariant " : "";
+   const char *const explicit_inv = (ir->data.explicit_invariant) ? "explicit_invariant " : "";
    const char *const prec = (ir->data.precise) ? "precise " : "";
    const char *const bindless = (ir->data.bindless) ? "bindless " : "";
    const char *const bound = (ir->data.bound) ? "bound " : "";
@@ -216,11 +217,11 @@ void ir_print_visitor::visit(ir_variable *ir)
    const char *const interp[] = { "", "smooth", "flat", "noperspective" };
    STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_MODE_COUNT);
 
-   fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s) ",
+   fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s) ",
            binding, loc, component, cent, bindless, bound,
            image_format, memory_read_only, memory_write_only,
            memory_coherent, memory_volatile, memory_restrict,
-           samp, patc, inv, prec, mode[ir->data.mode],
+           samp, patc, inv, explicit_inv, prec, mode[ir->data.mode],
            stream,
            interp[ir->data.interpolation]);
 
index b87933b..d4f0e58 100644 (file)
@@ -419,8 +419,10 @@ ir_reader::read_declaration(s_expression *expr)
          var->data.sample = 1;
       } else if (strcmp(qualifier->value(), "patch") == 0) {
          var->data.patch = 1;
+      } else if (strcmp(qualifier->value(), "explicit_invariant") == 0) {
+         var->data.explicit_invariant = true;
       } else if (strcmp(qualifier->value(), "invariant") == 0) {
-        var->data.invariant = 1;
+         var->data.invariant = true;
       } else if (strcmp(qualifier->value(), "uniform") == 0) {
         var->data.mode = ir_var_uniform;
       } else if (strcmp(qualifier->value(), "shader_storage") == 0) {
index 63e688b..13fc603 100644 (file)
@@ -63,6 +63,15 @@ program_resource_visitor::process(const glsl_type *type, const char *name,
 void
 program_resource_visitor::process(ir_variable *var, bool use_std430_as_default)
 {
+   const glsl_type *t =
+      var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+   process(var, t, use_std430_as_default);
+}
+
+void
+program_resource_visitor::process(ir_variable *var, const glsl_type *var_type,
+                                  bool use_std430_as_default)
+{
    unsigned record_array_count = 1;
    const bool row_major =
       var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
@@ -72,8 +81,7 @@ program_resource_visitor::process(ir_variable *var, bool use_std430_as_default)
          get_internal_ifc_packing(use_std430_as_default) :
       var->type->get_internal_ifc_packing(use_std430_as_default);
 
-   const glsl_type *t =
-      var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+   const glsl_type *t = var_type;
    const glsl_type *t_without_array = t->without_array();
 
    /* false is always passed for the row_major parameter to the other
index 3969c01..54ad3d6 100644 (file)
@@ -309,16 +309,16 @@ cross_validate_types_and_qualifiers(struct gl_context *ctx,
     *    "The invariance of varyings that are declared in both the vertex
     *     and fragment shaders must match."
     */
-   if (input->data.invariant != output->data.invariant &&
+   if (input->data.explicit_invariant != output->data.explicit_invariant &&
        prog->data->Version < (prog->IsES ? 300 : 430)) {
       linker_error(prog,
                    "%s shader output `%s' %s invariant qualifier, "
                    "but %s shader input %s invariant qualifier\n",
                    _mesa_shader_stage_to_string(producer_stage),
                    output->name,
-                   (output->data.invariant) ? "has" : "lacks",
+                   (output->data.explicit_invariant) ? "has" : "lacks",
                    _mesa_shader_stage_to_string(consumer_stage),
-                   (input->data.invariant) ? "has" : "lacks");
+                   (input->data.explicit_invariant) ? "has" : "lacks");
       return;
    }
 
@@ -773,8 +773,20 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
 
                output = explicit_locations[idx][input->data.location_frac].var;
 
-               if (output == NULL ||
-                   input->data.location != output->data.location) {
+               if (output == NULL) {
+                  /* A linker failure should only happen when there is no
+                   * output declaration and there is Static Use of the
+                   * declared input.
+                   */
+                  if (input->data.used) {
+                     linker_error(prog,
+                                  "%s shader input `%s' with explicit location "
+                                  "has no matching output\n",
+                                  _mesa_shader_stage_to_string(consumer->Stage),
+                                  input->name);
+                     break;
+                  }
+               } else if (input->data.location != output->data.location) {
                   linker_error(prog,
                                "%s shader input `%s' with explicit location "
                                "has no matching output\n",
@@ -804,7 +816,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
              */
             assert(!input->data.assigned);
             if (input->data.used && !input->get_interface_type() &&
-                !input->data.explicit_location && !prog->SeparateShader)
+                !input->data.explicit_location)
                linker_error(prog,
                             "%s shader input `%s' "
                             "has no matching output in the previous stage\n",
@@ -1166,8 +1178,7 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
          return false;
       }
 
-      if ((this->offset / 4) / info->Buffers[buffer].Stride !=
-          (xfb_offset - 1) / info->Buffers[buffer].Stride) {
+      if (xfb_offset > info->Buffers[buffer].Stride) {
          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
                       "buffer (%d)", xfb_offset * 4,
                       info->Buffers[buffer].Stride * 4, buffer);
@@ -2124,9 +2135,11 @@ class tfeedback_candidate_generator : public program_resource_visitor
 {
 public:
    tfeedback_candidate_generator(void *mem_ctx,
-                                 hash_table *tfeedback_candidates)
+                                 hash_table *tfeedback_candidates,
+                                 gl_shader_stage stage)
       : mem_ctx(mem_ctx),
         tfeedback_candidates(tfeedback_candidates),
+        stage(stage),
         toplevel_var(NULL),
         varying_floats(0)
    {
@@ -2136,10 +2149,17 @@ public:
    {
       /* All named varying interface blocks should be flattened by now */
       assert(!var->is_interface_instance());
+      assert(var->data.mode == ir_var_shader_out);
 
       this->toplevel_var = var;
       this->varying_floats = 0;
-      program_resource_visitor::process(var, false);
+      const glsl_type *t =
+         var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+      if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
+         assert(t->is_array());
+         t = t->fields.array;
+      }
+      program_resource_visitor::process(var, t, false);
    }
 
 private:
@@ -2173,6 +2193,8 @@ private:
     */
    hash_table * const tfeedback_candidates;
 
+   gl_shader_stage stage;
+
    /**
     * Pointer to the toplevel variable that is being traversed.
     */
@@ -2503,8 +2525,28 @@ assign_varying_locations(struct gl_context *ctx,
                  producer->Stage == MESA_SHADER_GEOMETRY));
 
          if (num_tfeedback_decls > 0) {
-            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
-            g.process(output_var);
+            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
+            /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
+             * ("Vertex Shader Variables / Output Variables")
+             *
+             * "Each program object can specify a set of output variables from
+             * one shader to be recorded in transform feedback mode (see
+             * section 13.3). The variables that can be recorded are those
+             * emitted by the first active shader, in order, from the
+             * following list:
+             *
+             *  * geometry shader
+             *  * tessellation evaluation shader
+             *  * tessellation control shader
+             *  * vertex shader"
+             *
+             * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
+             * Variables / Output Variables") tessellation control shader is
+             * not included in the stages list.
+             */
+            if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
+               g.process(output_var);
+            }
          }
 
          ir_variable *const input_var =
index 17fe0a5..f01d2e0 100644 (file)
@@ -1090,7 +1090,7 @@ cross_validate_globals(struct gl_context *ctx, struct gl_shader_program *prog,
             }
          }
 
-         if (existing->data.invariant != var->data.invariant) {
+         if (existing->data.explicit_invariant != var->data.explicit_invariant) {
             linker_error(prog, "declarations for %s `%s' have "
                          "mismatching invariant qualifiers\n",
                          mode_string(var), var->name);
@@ -3181,6 +3181,12 @@ match_explicit_outputs_to_inputs(gl_linked_shader *producer,
          const unsigned idx = var->data.location - VARYING_SLOT_VAR0;
          if (explicit_locations[idx][var->data.location_frac] == NULL)
             explicit_locations[idx][var->data.location_frac] = var;
+
+         /* Always match TCS outputs. They are shared by all invocations
+          * within a patch and can be used as shared memory.
+          */
+         if (producer->Stage == MESA_SHADER_TESS_CTRL)
+            var->data.is_unmatched_generic_inout = 0;
       }
    }
 
index f6fb003..be92dbf 100644 (file)
@@ -135,6 +135,26 @@ public:
    void process(ir_variable *var, bool use_std430_as_default);
 
    /**
+    * Begin processing a variable
+    *
+    * Classes that overload this function should call \c ::process from the
+    * base class to start the recursive processing of the variable.
+    *
+    * \param var  The variable that is to be processed
+    * \param var_type The glsl_type reference of the variable
+    *
+    * Calls \c ::visit_field for each leaf of the variable.
+    *
+    * \warning
+    * When processing a uniform block, this entry should only be used in cases
+    * where the row / column ordering of matrices in the block does not
+    * matter.  For example, enumerating the names of members of the block, but
+    * not for determining the offsets of members.
+    */
+   void process(ir_variable *var, const glsl_type *var_type,
+                bool use_std430_as_default);
+
+   /**
     * Begin processing a variable of a structured type.
     *
     * This flavor of \c process should be used to handle structured types
index 59ed766..979f6fc 100644 (file)
@@ -81,6 +81,12 @@ struct exec_node {
     * Insert a node in the list after the current node
     */
    void insert_after(exec_node *after);
+
+   /**
+    * Insert another list in the list after the current node
+    */
+   void insert_after(struct exec_list *after);
+
    /**
     * Insert a node in the list before the current node
     */
@@ -508,6 +514,21 @@ exec_list_append(struct exec_list *list, struct exec_list *source)
 }
 
 static inline void
+exec_node_insert_list_after(struct exec_node *n, struct exec_list *after)
+{
+   if (exec_list_is_empty(after))
+      return;
+
+   after->tail_sentinel.prev->next = n->next;
+   after->head_sentinel.next->prev = n;
+
+   n->next->prev = after->tail_sentinel.prev;
+   n->next = after->head_sentinel.next;
+
+   exec_list_make_empty(after);
+}
+
+static inline void
 exec_list_prepend(struct exec_list *list, struct exec_list *source)
 {
    exec_list_append(source, list);
@@ -635,6 +656,11 @@ inline void exec_list::append_list(exec_list *source)
    exec_list_append(this, source);
 }
 
+inline void exec_node::insert_after(exec_list *after)
+{
+   exec_node_insert_list_after(this, after);
+}
+
 inline void exec_list::prepend_list(exec_list *source)
 {
    exec_list_prepend(this, source);
index 6cd9a2d..2aae30d 100644 (file)
@@ -32,8 +32,9 @@ namespace {
 
 class vector_deref_visitor : public ir_rvalue_enter_visitor {
 public:
-   vector_deref_visitor()
-      : progress(false)
+   vector_deref_visitor(void *mem_ctx, gl_shader_stage shader_stage)
+      : progress(false), shader_stage(shader_stage),
+        factory(&factory_instructions, mem_ctx)
    {
    }
 
@@ -45,6 +46,9 @@ public:
    virtual ir_visitor_status visit_enter(ir_assignment *ir);
 
    bool progress;
+   gl_shader_stage shader_stage;
+   exec_list factory_instructions;
+   ir_factory factory;
 };
 
 } /* anonymous namespace */
@@ -65,13 +69,63 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
    ir_constant *old_index_constant =
       deref->array_index->constant_expression_value(mem_ctx);
    if (!old_index_constant) {
-      ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
-                                           new_lhs->type,
-                                           new_lhs->clone(mem_ctx, NULL),
-                                           ir->rhs,
-                                           deref->array_index);
-      ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
-      ir->set_lhs(new_lhs);
+      if (shader_stage == MESA_SHADER_TESS_CTRL &&
+          deref->variable_referenced()->data.mode == ir_var_shader_out) {
+         /* Tessellation control shader outputs act as if they have memory
+          * backing them and if we have writes from multiple threads
+          * targeting the same vec4 (this can happen for patch outputs), the
+          * load-vec-store pattern of ir_triop_vector_insert doesn't work.
+          * Instead, we have to lower to a series of conditional write-masked
+          * assignments.
+          */
+         ir_variable *const src_temp =
+            factory.make_temp(ir->rhs->type, "scalar_tmp");
+
+         /* The newly created variable declaration goes before the assignment
+          * because we're going to set it as the new LHS.
+          */
+         ir->insert_before(factory.instructions);
+         ir->set_lhs(new(mem_ctx) ir_dereference_variable(src_temp));
+
+         ir_variable *const arr_index =
+            factory.make_temp(deref->array_index->type, "index_tmp");
+         factory.emit(assign(arr_index, deref->array_index));
+
+         for (unsigned i = 0; i < new_lhs->type->vector_elements; i++) {
+            ir_constant *const cmp_index =
+               ir_constant::zero(factory.mem_ctx, deref->array_index->type);
+            cmp_index->value.u[0] = i;
+
+            ir_rvalue *const lhs_clone = new_lhs->clone(factory.mem_ctx, NULL);
+            ir_dereference_variable *const src_temp_deref =
+               new(mem_ctx) ir_dereference_variable(src_temp);
+
+            if (new_lhs->ir_type != ir_type_swizzle) {
+               assert(lhs_clone->as_dereference());
+               ir_assignment *cond_assign =
+                  new(mem_ctx) ir_assignment(lhs_clone->as_dereference(),
+                                             src_temp_deref,
+                                             equal(arr_index, cmp_index),
+                                             WRITEMASK_X << i);
+               factory.emit(cond_assign);
+            } else {
+               ir_assignment *cond_assign =
+                  new(mem_ctx) ir_assignment(swizzle(lhs_clone, i, 1),
+                                             src_temp_deref,
+                                             equal(arr_index, cmp_index));
+               factory.emit(cond_assign);
+            }
+         }
+         ir->insert_after(factory.instructions);
+      } else {
+         ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+                                              new_lhs->type,
+                                              new_lhs->clone(mem_ctx, NULL),
+                                              ir->rhs,
+                                              deref->array_index);
+         ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+         ir->set_lhs(new_lhs);
+      }
    } else if (new_lhs->ir_type != ir_type_swizzle) {
       ir->set_lhs(new_lhs);
       ir->write_mask = 1 << old_index_constant->get_uint_component(0);
@@ -105,7 +159,7 @@ vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
 bool
 lower_vector_derefs(gl_linked_shader *shader)
 {
-   vector_deref_visitor v;
+   vector_deref_visitor v(shader->ir, shader->Stage);
 
    visit_list_elements(&v, shader->ir);
 
index fdd99ec..ad258f8 100644 (file)
@@ -996,15 +996,14 @@ write_shader_parameters(struct blob *metadata,
                         struct gl_program_parameter_list *params)
 {
    blob_write_uint32(metadata, params->NumParameters);
-   blob_write_uint32(metadata, params->NumParameterValues);
    uint32_t i = 0;
 
    while (i < params->NumParameters) {
       struct gl_program_parameter *param = &params->Parameters[i];
-
       blob_write_uint32(metadata, param->Type);
       blob_write_string(metadata, param->Name);
       blob_write_uint32(metadata, param->Size);
+      blob_write_uint32(metadata, param->Padded);
       blob_write_uint32(metadata, param->DataType);
       blob_write_bytes(metadata, param->StateIndexes,
                        sizeof(param->StateIndexes));
@@ -1015,9 +1014,6 @@ write_shader_parameters(struct blob *metadata,
    blob_write_bytes(metadata, params->ParameterValues,
                     sizeof(gl_constant_value) * params->NumParameterValues);
 
-   blob_write_bytes(metadata, params->ParameterValueOffset,
-                    sizeof(uint32_t) * params->NumParameters);
-
    blob_write_uint32(metadata, params->StateFlags);
 }
 
@@ -1028,28 +1024,25 @@ read_shader_parameters(struct blob_reader *metadata,
    gl_state_index16 state_indexes[STATE_LENGTH];
    uint32_t i = 0;
    uint32_t num_parameters = blob_read_uint32(metadata);
-   uint32_t num_parameters_values = blob_read_uint32(metadata);
 
    _mesa_reserve_parameter_storage(params, num_parameters);
    while (i < num_parameters) {
       gl_register_file type = (gl_register_file) blob_read_uint32(metadata);
       const char *name = blob_read_string(metadata);
       unsigned size = blob_read_uint32(metadata);
+      bool padded = blob_read_uint32(metadata);
       unsigned data_type = blob_read_uint32(metadata);
       blob_copy_bytes(metadata, (uint8_t *) state_indexes,
                       sizeof(state_indexes));
 
       _mesa_add_parameter(params, type, name, size, data_type,
-                          NULL, state_indexes, false);
+                          NULL, state_indexes, padded);
 
       i++;
    }
 
    blob_copy_bytes(metadata, (uint8_t *) params->ParameterValues,
-                   sizeof(gl_constant_value) * num_parameters_values);
-
-   blob_copy_bytes(metadata, (uint8_t *) params->ParameterValueOffset,
-                   sizeof(uint32_t) * num_parameters);
+                   sizeof(gl_constant_value) * params->NumParameterValues);
 
    params->StateFlags = blob_read_uint32(metadata);
 }
index 70bce6a..c54512b 100644 (file)
@@ -258,6 +258,22 @@ glsl_type::contains_double() const
 }
 
 bool
+glsl_type::contains_64bit() const
+{
+   if (this->is_array()) {
+      return this->fields.array->contains_64bit();
+   } else if (this->is_record() || this->is_interface()) {
+      for (unsigned int i = 0; i < this->length; i++) {
+         if (this->fields.structure[i].type->contains_64bit())
+            return true;
+      }
+      return false;
+   } else {
+      return this->is_64bit();
+   }
+}
+
+bool
 glsl_type::contains_opaque() const {
    switch (base_type) {
    case GLSL_TYPE_SAMPLER:
index d32b580..1732d36 100644 (file)
@@ -527,6 +527,12 @@ public:
    bool contains_double() const;
 
    /**
+    * Query whether or not type is a 64-bit type, or for struct, interface and
+    * array types, contains a double type.
+    */
+   bool contains_64bit() const;
+
+   /**
     * Query whether or not a type is a float type
     */
    bool is_float() const
index 7e441ad..a5258f7 100644 (file)
@@ -32,7 +32,11 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
                     unsigned *offset,
                     const struct glsl_type *type)
 {
-   if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
+   /* If this type contains a 64-bit value, align to 8 bytes */
+   if (glsl_type_contains_64bit(type))
+      *offset = ALIGN_POT(*offset, 8);
+
+   if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
       unsigned length = glsl_get_length(type);
       const struct glsl_type *child_type = glsl_get_array_element(type);
       for (unsigned i = 0; i < length; i++)
@@ -57,32 +61,43 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
       assert(var->data.stream < NIR_MAX_XFB_STREAMS);
       xfb->streams_written |= (1 << var->data.stream);
 
-      unsigned comp_slots = glsl_get_component_slots(type);
-      unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
-      assert(attrib_slots == glsl_count_attribute_slots(type, false));
-
-      /* Ensure that we don't have, for instance, a dvec2 with a location_frac
-       * of 2 which would make it crass a location boundary even though it
-       * fits in a single slot.  However, you can have a dvec3 which crosses
-       * the slot boundary with a location_frac of 2.
-       */
-      assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
+      unsigned comp_slots;
+      if (var->data.compact) {
+         /* This only happens for clip/cull which are float arrays */
+         assert(glsl_without_array(type) == glsl_float_type());
+         assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
+                var->data.location == VARYING_SLOT_CLIP_DIST1);
+         comp_slots = glsl_get_length(type);
+      } else {
+         comp_slots = glsl_get_component_slots(type);
+
+         unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
+         assert(attrib_slots == glsl_count_attribute_slots(type, false));
+
+         /* Ensure that we don't have, for instance, a dvec2 with a
+          * location_frac of 2 which would make it crass a location boundary
+          * even though it fits in a single slot.  However, you can have a
+          * dvec3 which crosses the slot boundary with a location_frac of 2.
+          */
+         assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
+                attrib_slots);
+      }
 
       assert(var->data.location_frac + comp_slots <= 8);
       uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
 
-      assert(attrib_slots <= 2);
-      for (unsigned s = 0; s < attrib_slots; s++) {
+      while (comp_mask) {
          nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
 
          output->buffer = var->data.xfb_buffer;
-         output->offset = *offset + s * 16;
+         output->offset = *offset;
          output->location = *location;
-         output->component_mask = (comp_mask >> (s * 4)) & 0xf;
+         output->component_mask = comp_mask & 0xf;
 
+         *offset += util_bitcount(output->component_mask) * 4;
          (*location)++;
+         comp_mask >>= 4;
       }
-      *offset += comp_slots * 4;
    }
 }
 
index 422662e..c6c0b82 100644 (file)
@@ -643,7 +643,7 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
 
          struct copy_entry *src_entry =
             lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
-         struct value value;
+         struct value value = {0};
          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
             if (value.is_ssa) {
                /* lookup_load has already ensured that we get a single SSA
index 3cd61f6..7af75dc 100644 (file)
@@ -307,6 +307,12 @@ glsl_type_is_integer(const struct glsl_type *type)
    return type->is_integer();
 }
 
+bool
+glsl_type_contains_64bit(const struct glsl_type *type)
+{
+   return type->contains_64bit();
+}
+
 const glsl_type *
 glsl_void_type(void)
 {
index 70d593b..1ab82e0 100644 (file)
@@ -143,6 +143,7 @@ bool glsl_type_is_dual_slot(const struct glsl_type *type);
 bool glsl_type_is_numeric(const struct glsl_type *type);
 bool glsl_type_is_boolean(const struct glsl_type *type);
 bool glsl_type_is_integer(const struct glsl_type *type);
+bool glsl_type_contains_64bit(const struct glsl_type *type);
 bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
 bool glsl_sampler_type_is_array(const struct glsl_type *type);
 bool glsl_contains_atomic(const struct glsl_type *type);
index 16d9c92..3579182 100644 (file)
@@ -2021,6 +2021,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    case nir_texop_txl:
    case nir_texop_txd:
    case nir_texop_tg4:
+   case nir_texop_lod:
       /* These operations require a sampler */
       p->src = nir_src_for_ssa(&sampler->dest.ssa);
       p->src_type = nir_tex_src_sampler_deref;
@@ -2029,7 +2030,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
    case nir_texop_txf:
    case nir_texop_txf_ms:
    case nir_texop_txs:
-   case nir_texop_lod:
    case nir_texop_query_levels:
    case nir_texop_texture_samples:
    case nir_texop_samples_identical:
index 0eb9f26..09a0981 100644 (file)
@@ -2039,9 +2039,17 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
    case SpvOpArrayLength: {
       struct vtn_pointer *ptr =
          vtn_value(b, w[3], vtn_value_type_pointer)->pointer;
+      const uint32_t field = w[4];
 
-      const uint32_t offset = ptr->var->type->offsets[w[4]];
-      const uint32_t stride = ptr->var->type->members[w[4]]->stride;
+      vtn_fail_if(ptr->type->base_type != vtn_base_type_struct,
+                  "OpArrayLength must take a pointer to a structure type");
+      vtn_fail_if(field != ptr->type->length - 1 ||
+                  ptr->type->members[field]->base_type != vtn_base_type_array,
+                  "OpArrayLength must reference the last memeber of the "
+                  "structure and that must be an array");
+
+      const uint32_t offset = ptr->type->offsets[field];
+      const uint32_t stride = ptr->type->members[field]->stride;
 
       if (!ptr->block_index) {
          struct vtn_access_chain chain = {
index 5253a8a..6108d97 100644 (file)
@@ -198,8 +198,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
    bind_to_texture_rgb = 0;
    bind_to_texture_rgba = 0;
 
-   for (int i = 0; dri2_dpy->core->indexConfigAttrib(dri_config, i, &attrib,
-                                                     &value); ++i) {
+   for (int i = 0; i < __DRI_ATTRIB_MAX; ++i) {
+      if (!dri2_dpy->core->indexConfigAttrib(dri_config, i, &attrib, &value))
+         break;
+
       switch (attrib) {
       case __DRI_ATTRIB_RENDER_TYPE:
          if (value & __DRI_ATTRIB_RGBA_BIT)
index 89f4cb7..e3ed7fe 100644 (file)
@@ -637,10 +637,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
    struct dri2_egl_display *dri2_dpy =
       dri2_egl_display(dri2_surf->base.Resource.Display);
 
-   if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
-       dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
-
-      dri2_wl_release_buffers(dri2_surf);
+   if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
+       dri2_surf->base.Height != dri2_surf->wl_win->height) {
 
       dri2_surf->base.Width  = dri2_surf->wl_win->width;
       dri2_surf->base.Height = dri2_surf->wl_win->height;
@@ -648,6 +646,11 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
       dri2_surf->dy = dri2_surf->wl_win->dy;
    }
 
+   if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
+       dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
+      dri2_wl_release_buffers(dri2_surf);
+   }
+
    if (get_back_bo(dri2_surf) < 0) {
       _eglError(EGL_BAD_ALLOC, "failed to allocate color buffer");
       return -1;
index 4878039..c5c9a21 100644 (file)
@@ -202,18 +202,6 @@ _eglDeviceSupports(_EGLDevice *dev, _EGLDeviceExtension ext)
    };
 }
 
-/* Ideally we'll have an extension which passes the render node,
- * instead of the card one + magic.
- *
- * Then we can move this in _eglQueryDeviceStringEXT below. Until then
- * keep it separate.
- */
-const char *
-_eglGetDRMDeviceRenderNode(_EGLDevice *dev)
-{
-   return dev->device->nodes[DRM_NODE_RENDER];
-}
-
 EGLBoolean
 _eglQueryDeviceAttribEXT(_EGLDevice *dev, EGLint attribute,
                          EGLAttrib *value)
index ddcdcd1..acd040c 100644 (file)
@@ -68,9 +68,6 @@ typedef enum _egl_device_extension _EGLDeviceExtension;
 EGLBoolean
 _eglDeviceSupports(_EGLDevice *dev, _EGLDeviceExtension ext);
 
-const char *
-_eglGetDRMDeviceRenderNode(_EGLDevice *dev);
-
 EGLBoolean
 _eglQueryDeviceAttribEXT(_EGLDevice *dev, EGLint attribute,
                          EGLAttrib *value);
index 8c0ffea..51b7a9a 100644 (file)
@@ -96,10 +96,11 @@ if with_dri2
   c_args_for_egl += [
     '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
   ]
+  link_for_egl += [libloader, libxmlconfig]
+  incs_for_egl += inc_loader
 
   if with_platform_x11
     files_egl += files('drivers/dri2/platform_x11.c')
-    incs_for_egl += inc_loader
     if with_dri3
       files_egl += files('drivers/dri2/platform_x11_dri3.c')
       link_for_egl += libloader_dri3_helper
@@ -108,13 +109,12 @@ if with_dri2
   endif
   if with_platform_drm
     files_egl += files('drivers/dri2/platform_drm.c')
-    link_for_egl += [libloader, libgbm, libxmlconfig]
-    incs_for_egl += [inc_loader, inc_gbm, include_directories('../gbm/main')]
+    link_for_egl += libgbm
+    incs_for_egl += [inc_gbm, include_directories('../gbm/main')]
     deps_for_egl += dep_libdrm
   endif
   if with_platform_surfaceless
     files_egl += files('drivers/dri2/platform_surfaceless.c')
-    incs_for_egl += [inc_loader]
   endif
   if with_platform_wayland
     deps_for_egl += [dep_wayland_client, dep_wayland_server, dep_wayland_egl_headers]
@@ -130,7 +130,6 @@ if with_dri2
   if with_platform_android
     deps_for_egl += dep_android
     files_egl += files('drivers/dri2/platform_android.c')
-    incs_for_egl += [inc_loader]
   endif
 elif with_platform_haiku
   incs_for_egl += inc_haikugl
@@ -169,7 +168,7 @@ libegl = shared_library(
     '-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
   ],
   include_directories : incs_for_egl,
-  link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
+  link_with : [link_for_egl, libglapi, libmesa_util],
   link_args : [ld_args_bsymbolic, ld_args_gc_sections],
   dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
   install : true,
index 1887ff3..e251e1d 100644 (file)
@@ -213,7 +213,9 @@ tgsi_is_bindless_image_file(unsigned file)
 {
    return file != TGSI_FILE_IMAGE &&
           file != TGSI_FILE_MEMORY &&
-          file != TGSI_FILE_BUFFER;
+          file != TGSI_FILE_BUFFER &&
+          file != TGSI_FILE_CONSTBUF &&
+          file != TGSI_FILE_HW_ATOMIC;
 }
 
 #ifdef __cplusplus
index 75ef4a5..6068e9f 100644 (file)
@@ -64,6 +64,7 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
        memset(&result, 0, sizeof(result));
        switch (pic->base.profile) {
        case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+       case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
                result.profile = RDECODE_H264_PROFILE_BASELINE;
                break;
 
@@ -490,7 +491,7 @@ static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
 
        assert(dec->base.max_references + 1 <= 16);
 
-       for (i = 0 ; i < dec->base.max_references + 1 ; ++i) {
+       for (i = 0 ; i < 16 ; ++i) {
                if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
                        result.curr_pic_idx =
                                (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
index 8c5078c..095110a 100644 (file)
@@ -8,4 +8,5 @@ DRI_CONF_SECTION_END
 
 DRI_CONF_SECTION_DEBUG
    DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
+   DRI_CONF_RADEONSI_ENABLE_NIR("false")
 DRI_CONF_SECTION_END
index 20e4f59..3cbc440 100644 (file)
@@ -112,12 +112,20 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
        sb[0].buffer_offset = dst_offset;
        sb[0].buffer_size = size;
 
+       bool shader_dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU;
+
        if (src) {
                sb[1].buffer = src;
                sb[1].buffer_offset = src_offset;
                sb[1].buffer_size = size;
 
                ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 2, sb);
+
+               if (!sctx->cs_copy_buffer) {
+                       sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b,
+                                                            SI_COMPUTE_COPY_DW_PER_THREAD,
+                                                            shader_dst_stream_policy, true);
+               }
                ctx->bind_compute_state(ctx, sctx->cs_copy_buffer);
        } else {
                assert(clear_value_size >= 4 &&
@@ -128,6 +136,12 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
                        sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)];
 
                ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 1, sb);
+
+               if (!sctx->cs_clear_buffer) {
+                       sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b,
+                                                            SI_COMPUTE_CLEAR_DW_PER_THREAD,
+                                                            shader_dst_stream_policy, false);
+               }
                ctx->bind_compute_state(ctx, sctx->cs_clear_buffer);
        }
 
index 490a371..e37a85e 100644 (file)
@@ -508,14 +508,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        if (sscreen->debug_flags & DBG(FORCE_DMA))
                sctx->b.resource_copy_region = sctx->dma_copy;
 
-       bool dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU;
-       sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b,
-                                            SI_COMPUTE_CLEAR_DW_PER_THREAD,
-                                            dst_stream_policy, false);
-       sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b,
-                                            SI_COMPUTE_COPY_DW_PER_THREAD,
-                                            dst_stream_policy, true);
-
        sctx->blitter = util_blitter_create(&sctx->b);
        if (sctx->blitter == NULL)
                goto fail;
@@ -873,7 +865,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
                sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
        if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
                sscreen->debug_flags |= DBG(SI_SCHED);
-
+       if (driQueryOptionb(config->options, "radeonsi_enable_nir"))
+               sscreen->debug_flags |= DBG(NIR);
 
        if (sscreen->debug_flags & DBG(INFO))
                ac_print_gpu_info(&sscreen->info);
index 64bb956..a9a1be7 100644 (file)
@@ -186,7 +186,7 @@ static void si_emit_guardband(struct si_context *ctx)
                ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
 
        /* Indexed by quantization modes */
-       static unsigned max_viewport_size[] = {65535, 16383, 4095};
+       static int max_viewport_size[] = {65535, 16383, 4095};
 
        /* Ensure that the whole viewport stays representable in
         * absolute coordinates.
index 351736e..998939b 100644 (file)
@@ -373,17 +373,18 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc, int layer)
                if (util_format_is_pure_uint(tc->surface->format)) {
                   pipe_put_tile_ui_format(pt, tc->transfer_map[layer],
                                           x, y, TILE_SIZE, TILE_SIZE,
-                                          pt->resource->format,
+                                          tc->surface->format,
                                           (unsigned *) tc->tile->data.colorui128);
                } else if (util_format_is_pure_sint(tc->surface->format)) {
                   pipe_put_tile_i_format(pt, tc->transfer_map[layer],
                                          x, y, TILE_SIZE, TILE_SIZE,
-                                         pt->resource->format,
+                                         tc->surface->format,
                                          (int *) tc->tile->data.colori128);
                } else {
-                  pipe_put_tile_rgba(pt, tc->transfer_map[layer],
-                                     x, y, TILE_SIZE, TILE_SIZE,
-                                     (float *) tc->tile->data.color);
+                  pipe_put_tile_rgba_format(pt, tc->transfer_map[layer],
+                                            x, y, TILE_SIZE, TILE_SIZE,
+                                            tc->surface->format,
+                                            (float *) tc->tile->data.color);
                }
             }
             numCleared++;
index de9008d..c29a90b 100644 (file)
@@ -844,7 +844,9 @@ swr_texture_layout(struct swr_screen *screen,
 
    size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch *
                                  res->swr.pitch * res->swr.numSamples;
-   if (total_size > SWR_MAX_TEXTURE_SIZE)
+
+   // Let non-sampled textures (e.g. buffer objects) bypass the size limit
+   if (swr_resource_is_texture(&res->base) && total_size > SWR_MAX_TEXTURE_SIZE)
       return false;
 
    if (allocate) {
index 7b68c09..0595da5 100644 (file)
@@ -377,6 +377,10 @@ d3dmultisample_type_check(struct pipe_screen *screen,
     if (levels)
         *levels = 1;
 
+    /* Ignores multisamplequality */
+    if (*multisample == D3DMULTISAMPLE_NONE)
+        return D3D_OK;
+
     if (*multisample == D3DMULTISAMPLE_NONMASKABLE) {
         if (depth_stencil_format(format))
             bind = d3d9_get_pipe_depth_format_bindings(format);
index 0958a90..296a9c0 100644 (file)
@@ -696,9 +696,16 @@ present( struct NineSwapChain9 *This,
     if (This->params.SwapEffect == D3DSWAPEFFECT_DISCARD)
         handle_draw_cursor_and_hud(This, resource);
 
-    ID3DPresent_GetWindowInfo(This->present, hDestWindowOverride, &target_width, &target_height, &target_depth);
+    hr = ID3DPresent_GetWindowInfo(This->present, hDestWindowOverride, &target_width, &target_height, &target_depth);
     (void)target_depth;
 
+    /* Can happen with old Wine (presentation can still succeed),
+     * or at window destruction. */
+    if (FAILED(hr) || target_width == 0 || target_height == 0) {
+        target_width = resource->width0;
+        target_height = resource->height0;
+    }
+
     pipe = NineDevice9_GetPipe(This->base.device);
 
     if (This->present_buffers[0]) {
index bc72b11..b3c31c5 100644 (file)
@@ -68,5 +68,5 @@ pkg.generate(
   description : 'Native D3D driver modules',
   version : '.'.join(nine_version),
   requires_private : 'libdrm >= ' + dep_libdrm.version(),
-  variables : ['moduledir=${prefix}/@0@'.format(d3d_drivers_path)],
+  variables : ['moduledir=@0@'.format(d3d_drivers_path)],
 )
index 51940a2..4079ec3 100644 (file)
@@ -60,6 +60,9 @@ libgallium_dri = shared_library(
     driver_imx, driver_tegra, driver_i915, driver_svga, driver_virgl,
     driver_swr,
   ],
+  # Will be deleted during installation, see install_megadrivers.py
+  install : true,
+  install_dir : dri_drivers_path,
 )
 
 foreach d : [[with_gallium_pl111, 'pl111_dri.so'],
index 6811e6f..7772ae4 100644 (file)
@@ -32,7 +32,7 @@ endif
 
 libomx_gallium = shared_library(
   'omx_mesa',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [omx_link_args, ld_args_gc_sections],
index ded689b..827cfb8 100644 (file)
@@ -33,7 +33,7 @@ endif
 
 libva_gallium = shared_library(
   'gallium_drv_video',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [va_link_args, ld_args_gc_sections],
@@ -49,6 +49,7 @@ libva_gallium = shared_library(
     dep_libdrm, dep_thread, driver_r600, driver_radeonsi, driver_nouveau,
   ],
   link_depends : va_link_depends,
+  # Will be deleted during installation, see install_megadrivers.py
   install : true,
   install_dir : va_drivers_path,
 )
index 22e3f5f..3017721 100644 (file)
@@ -38,7 +38,7 @@ endif
 
 libvdpau_gallium = shared_library(
   'vdpau_gallium',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [vdpau_link_args, ld_args_gc_sections],
@@ -55,6 +55,9 @@ libvdpau_gallium = shared_library(
   ],
   link_depends : vdpau_link_depends,
   soversion : '@0@.@1@.0'.format(VDPAU_MAJOR, VDPAU_MINOR),
+  # Will be deleted during installation, see install_megadrivers.py
+  install : true,
+  install_dir : vdpau_drivers_path,
 )
 foreach d : [[with_gallium_r300, 'r300'],
              [with_gallium_r600, 'r600'],
index 733ef54..582d5ef 100644 (file)
@@ -34,7 +34,7 @@ _xa_version = '.'.join(xa_version)
 
 libxatracker = shared_library(
   'xatracker',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [xa_link_args, ld_args_gc_sections],
index 0af5b64..845f3a4 100644 (file)
@@ -33,7 +33,7 @@ endif
 
 libxvmc_gallium = shared_library(
   'XvMCgallium',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [xvmc_link_args, ld_args_gc_sections],
@@ -47,6 +47,9 @@ libxvmc_gallium = shared_library(
   ],
   dependencies : [dep_thread, driver_r600, driver_nouveau],
   link_depends : xvmc_link_depends,
+  # Will be deleted during installation, see install_megadrivers.py
+  install : true,
+  install_dir : xvmc_drivers_path,
 )
 
 foreach d : [[with_gallium_r600, 'r600'], [with_gallium_nouveau, 'nouveau']]
index ce60b95..d9417d3 100644 (file)
@@ -642,7 +642,6 @@ dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int interval)
       break;
    }
 
-   priv->swap_interval = interval;
    loader_dri3_set_swap_interval(&priv->loader_drawable, interval);
 
    return 0;
@@ -659,7 +658,7 @@ dri3_get_swap_interval(__GLXDRIdrawable *pdraw)
 
    struct dri3_drawable *priv =  (struct dri3_drawable *) pdraw;
 
-  return priv->swap_interval;
+  return priv->loader_drawable.swap_interval;
 }
 
 static void
index 1d3c03f..32a8d3f 100644 (file)
@@ -117,7 +117,6 @@ struct dri3_context
 struct dri3_drawable {
    __GLXDRIdrawable base;
    struct loader_dri3_drawable loader_drawable;
-   int swap_interval;
 
    /* LIBGL_SHOW_FPS support */
    uint64_t previous_ust;
index fa0f0d7..88e45c4 100644 (file)
@@ -147,6 +147,9 @@ XDestroyDrawable(struct drisw_drawable * pdp, Display * dpy, XID drawable)
    if (pdp->ximage)
       XDestroyImage(pdp->ximage);
 
+   if (pdp->shminfo.shmid > 0)
+      XShmDetach(dpy, &pdp->shminfo);
+
    free(pdp->visinfo);
 
    XFreeGC(dpy, pdp->gc);
index db81fad..dcf653e 100644 (file)
@@ -23,9 +23,10 @@ LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 include $(LOCAL_PATH)/Makefile.sources
 
-VK_ENTRYPOINTS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_entrypoints_gen.py
-
-VK_EXTENSIONS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_extensions_gen.py
+ANV_ENTRYPOINTS_GEN_SCRIPT := $(LOCAL_PATH)/vulkan/anv_entrypoints_gen.py
+ANV_EXTENSIONS_GEN_SCRIPT := $(LOCAL_PATH)/vulkan/anv_extensions_gen.py
+ANV_EXTENSIONS_SCRIPT := $(LOCAL_PATH)/vulkan/anv_extensions.py
+VULKAN_API_XML := $(MESA_TOP)/src/vulkan/registry/vk.xml
 
 VULKAN_COMMON_INCLUDES := \
        $(MESA_TOP)/include \
@@ -64,10 +65,13 @@ $(intermediates)/vulkan/dummy.c:
        @echo "Gen Dummy: $(PRIVATE_MODULE) <= $(notdir $(@))"
        $(hide) touch $@
 
-$(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c
-       $(VK_ENTRYPOINTS_SCRIPT) \
+$(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c \
+                                          $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+                                          $(ANV_EXTENSIONS_SCRIPT) \
+                                          $(VULKAN_API_XML)
+       $(MESA_PYTHON2) $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
                --outdir $(dir $@) \
-               --xml $(MESA_TOP)/src/vulkan/registry/vk.xml
+               --xml $(VULKAN_API_XML)
 
 LOCAL_EXPORT_C_INCLUDE_DIRS := \
         $(intermediates)
@@ -241,22 +245,28 @@ LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h
 
-$(intermediates)/vulkan/anv_entrypoints.c:
+$(intermediates)/vulkan/anv_entrypoints.c: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+                                          $(ANV_EXTENSIONS_SCRIPT) \
+                                          $(VULKAN_API_XML)
        @mkdir -p $(dir $@)
-       $(VK_ENTRYPOINTS_SCRIPT) \
-               --xml $(MESA_TOP)/src/vulkan/registry/vk.xml \
+       $(MESA_PYTHON2) $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+               --xml $(VULKAN_API_XML) \
                --outdir $(dir $@)
 
-$(intermediates)/vulkan/anv_extensions.c:
+$(intermediates)/vulkan/anv_extensions.c: $(ANV_EXTENSIONS_GEN_SCRIPT) \
+                                         $(ANV_EXTENSIONS_SCRIPT) \
+                                         $(VULKAN_API_XML)
        @mkdir -p $(dir $@)
-       $(VK_EXTENSIONS_SCRIPT) \
-               --xml $(MESA_TOP)/src/vulkan/registry/vk.xml \
+       $(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
+               --xml $(VULKAN_API_XML) \
                --out-c $@
 
-$(intermediates)/vulkan/anv_extensions.h:
+$(intermediates)/vulkan/anv_extensions.h: $(ANV_EXTENSIONS_GEN_SCRIPT) \
+                                          $(ANV_EXTENSIONS_SCRIPT) \
+                                          $(VULKAN_API_XML)
        @mkdir -p $(dir $@)
-       $(VK_EXTENSIONS_SCRIPT) \
-               --xml $(MESA_TOP)/src/vulkan/registry/vk.xml \
+       $(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
+               --xml $(VULKAN_API_XML) \
                --out-h $@
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
@@ -308,7 +318,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
        libmesa_intel_compiler \
        libmesa_anv_entrypoints
 
-LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog
+LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libexpat libz libsync liblog
 
 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
index e6e6d71..8a2391c 100644 (file)
@@ -3020,6 +3020,7 @@ fs_visitor::opt_peephole_csel()
 
             if (csel_inst != NULL) {
                progress = true;
+               csel_inst->saturate = inst->saturate;
                inst->remove(block);
             }
 
index 84627e8..f663ff5 100644 (file)
@@ -90,9 +90,16 @@ brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst,
           *       different execution size when the number of components
           *       written to each destination GRF is not the same.
           */
-         const unsigned width = MIN2(reg_width, phys_width);
-         brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
-         brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
+         if (reg->stride > 4) {
+            assert(reg != &inst->dst);
+            assert(reg->stride * type_sz(reg->type) <= REG_SIZE);
+            brw_reg = brw_vecn_reg(1, brw_file_from_reg(reg), reg->nr, 0);
+            brw_reg = stride(brw_reg, reg->stride, 1, 0);
+         } else {
+            const unsigned width = MIN2(reg_width, phys_width);
+            brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
+            brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
+         }
 
          if (devinfo->gen == 7 && !devinfo->is_haswell) {
             /* From the IvyBridge PRM (EU Changes by Processor Generation, page 13):
@@ -2337,6 +2344,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          break;
 
       case SHADER_OPCODE_INTERLOCK:
+         assert(devinfo->gen >= 9);
          /* The interlock is basically a memory fence issued via sendc */
          brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
          break;
index c33394d..5d309a5 100644 (file)
@@ -511,6 +511,15 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
        src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
       return false;
 
+   /* If either opcode has source modifiers, bail.
+    *
+    * TODO: We can potentially handle source modifiers if both of the opcodes
+    * we're combining are signed integers.
+    */
+   if (instr->src[0].abs || instr->src[0].negate ||
+       src0->src[0].abs || src0->src[0].negate)
+      return false;
+
    nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
    assert(element != NULL);
 
@@ -1515,16 +1524,25 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
        *    Use two instructions and a word or DWord intermediate integer type.
        */
       if (nir_dest_bit_size(instr->dest.dest) == 64) {
-         const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i8);
+         const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8);
 
          if (instr->op == nir_op_extract_i8) {
             /* If we need to sign extend, extract to a word first */
             fs_reg w_temp = bld.vgrf(BRW_REGISTER_TYPE_W);
             bld.MOV(w_temp, subscript(op[0], type, byte->u32[0]));
             bld.MOV(result, w_temp);
+         } else if (byte->u32[0] & 1) {
+            /* Extract the high byte from the word containing the desired byte
+             * offset.
+             */
+            bld.SHR(result,
+                    subscript(op[0], BRW_REGISTER_TYPE_UW, byte->u32[0] / 2),
+                    brw_imm_uw(8));
          } else {
             /* Otherwise use an AND with 0xff and a word type */
-            bld.AND(result, subscript(op[0], type, byte->u32[0] / 2), brw_imm_uw(0xff));
+            bld.AND(result,
+                    subscript(op[0], BRW_REGISTER_TYPE_UW, byte->u32[0] / 2),
+                    brw_imm_uw(0xff));
          }
       } else {
          const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8);
index 5dbd060..ee1a2e8 100644 (file)
@@ -777,6 +777,7 @@ static const struct gen_device_info gen_device_info_cfl_gt1 = {
    .num_subslices = { 2, },
    .num_eu_per_subslice = 6,
    .l3_banks = 2,
+   .urb.size = 192,
    .simulator_id = 24,
 };
 static const struct gen_device_info gen_device_info_cfl_gt2 = {
index abd5da2..668cab4 100644 (file)
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
index c69d7dc..4828c74 100644 (file)
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
index 6dde797..39edb7e 100644 (file)
     <field name="AA Line Distance Mode" start="110" end="110" type="uint">
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
index dfc3d89..60392c7 100644 (file)
     <field name="AA Line Distance Mode" start="110" end="110" type="uint">
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
index d42c63a..e73fda9 100644 (file)
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
index ca26825..3d421a1 100644 (file)
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
index bbe7369..43e0f49 100644 (file)
@@ -58,6 +58,9 @@ void anv_GetDescriptorSetLayoutSupport(
                anv_foreach_stage(s, binding->stageFlags)
                   surface_count[s] += sampler->n_planes;
             }
+         } else {
+            anv_foreach_stage(s, binding->stageFlags)
+               surface_count[s] += binding->descriptorCount;
          }
          break;
 
@@ -458,6 +461,8 @@ VkResult anv_CreateDescriptorPool(
                          &device->surface_state_pool, 4096);
    pool->surface_state_free_list = NULL;
 
+   list_inithead(&pool->desc_sets);
+
    *pDescriptorPool = anv_descriptor_pool_to_handle(pool);
 
    return VK_SUCCESS;
@@ -475,6 +480,12 @@ void anv_DestroyDescriptorPool(
       return;
 
    anv_state_stream_finish(&pool->surface_state_stream);
+
+   list_for_each_entry_safe(struct anv_descriptor_set, set,
+                            &pool->desc_sets, pool_link) {
+      anv_descriptor_set_destroy(device, pool, set);
+   }
+
    vk_free2(&device->alloc, pAllocator, pool);
 }
 
@@ -486,6 +497,11 @@ VkResult anv_ResetDescriptorPool(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
 
+   list_for_each_entry_safe(struct anv_descriptor_set, set,
+                            &pool->desc_sets, pool_link) {
+      anv_descriptor_set_destroy(device, pool, set);
+   }
+
    pool->next = 0;
    pool->free_list = EMPTY;
    anv_state_stream_finish(&pool->surface_state_stream);
@@ -630,6 +646,8 @@ anv_descriptor_set_destroy(struct anv_device *device,
       entry->size = set->size;
       pool->free_list = (char *) entry - pool->data;
    }
+
+   list_del(&set->pool_link);
 }
 
 VkResult anv_AllocateDescriptorSets(
@@ -652,6 +670,8 @@ VkResult anv_AllocateDescriptorSets(
       if (result != VK_SUCCESS)
          break;
 
+      list_addtail(&set->pool_link, &pool->desc_sets);
+
       pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
    }
 
index 071a30e..b96166c 100644 (file)
@@ -1046,7 +1046,7 @@ void anv_GetPhysicalDeviceProperties(
          16 * devinfo->max_cs_threads,
          16 * devinfo->max_cs_threads,
       },
-      .subPixelPrecisionBits                    = 4 /* FIXME */,
+      .subPixelPrecisionBits                    = 8,
       .subTexelPrecisionBits                    = 4 /* FIXME */,
       .mipmapPrecisionBits                      = 4 /* FIXME */,
       .maxDrawIndexedIndexValue                 = UINT32_MAX,
index 71e511f..a15c99a 100644 (file)
@@ -269,6 +269,7 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state,
    tex->texture_index = old_tex->texture_index;
    tex->texture_array_size = old_tex->texture_array_size;
    tex->sampler_index = old_tex->sampler_index;
+   tex->is_array = old_tex->is_array;
 
    nir_ssa_dest_init(&tex->instr, &tex->dest,
                      old_tex->dest.ssa.num_components,
index 72bd992..c4fac75 100644 (file)
@@ -164,12 +164,28 @@ anv_render_pass_compile(struct anv_render_pass *pass)
     * subpasses and checking to see if any of them don't have an external
     * dependency.  Or, we could just be lazy and add a couple extra flushes.
     * We choose to be lazy.
+    *
+    * From the documentation for vkCmdNextSubpass:
+    *
+    *    "Moving to the next subpass automatically performs any multisample
+    *    resolve operations in the subpass being ended. End-of-subpass
+    *    multisample resolves are treated as color attachment writes for the
+    *    purposes of synchronization. This applies to resolve operations for
+    *    both color and depth/stencil attachments. That is, they are
+    *    considered to execute in the
+    *    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage and
+    *    their writes are synchronized with
+    *    VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT."
+    *
+    * Therefore, the above flags concerning color attachments also apply to
+    * color and depth/stencil resolve attachments.
     */
    if (all_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
       pass->subpass_flushes[0] |=
          ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
    }
-   if (all_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+   if (all_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+                    VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
       pass->subpass_flushes[pass->subpass_count] |=
          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
    }
index b705650..11ed6b4 100644 (file)
@@ -1523,6 +1523,10 @@ struct anv_descriptor_set {
    uint32_t size;
    uint32_t buffer_count;
    struct anv_buffer_view *buffer_views;
+
+   /* Link to descriptor pool's desc_sets list . */
+   struct list_head pool_link;
+
    struct anv_descriptor descriptors[0];
 };
 
@@ -1556,6 +1560,8 @@ struct anv_descriptor_pool {
    struct anv_state_stream surface_state_stream;
    void *surface_state_free_list;
 
+   struct list_head desc_sets;
+
    char data[0];
 };
 
index cc48554..a66f9db 100644 (file)
@@ -465,6 +465,7 @@ emit_rs_state(struct anv_pipeline *pipeline,
    sf.TriangleStripListProvokingVertexSelect = 0;
    sf.LineStripListProvokingVertexSelect = 0;
    sf.TriangleFanProvokingVertexSelect = 1;
+   sf.VertexSubPixelPrecisionSelect = _8Bit;
 
    const struct brw_vue_prog_data *last_vue_prog_data =
       anv_pipeline_get_last_vue_prog_data(pipeline);
@@ -1079,6 +1080,10 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
       clip.APIMode                  = APIMODE_D3D,
       clip.ViewportXYClipTestEnable = true;
 
+#if GEN_GEN >= 8
+      clip.VertexSubPixelPrecisionSelect = _8Bit;
+#endif
+
       clip.ClipMode = CLIPMODE_NORMAL;
 
       clip.TriangleStripListProvokingVertexSelect = 0;
index 7fd1566..cf6df18 100644 (file)
@@ -1002,6 +1002,9 @@ struct brw_context
 
       /* High bits of the last seen index buffer address (for workarounds). */
       uint16_t last_bo_high_bits;
+
+      /* Used to understand is GPU state of primitive restart is up to date */
+      bool enable_cut_index;
    } ib;
 
    /* Active vertex program:
index dc3022b..5562631 100644 (file)
@@ -774,6 +774,14 @@ brw_upload_indices(struct brw_context *brw)
       brw->ib.index_size = index_buffer->index_size;
       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
    }
+
+   /* We need to re-emit an index buffer state each time
+    * when cut index flag is changed
+    */
+   if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
+      brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
+      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
+   }
 }
 
 const struct brw_tracked_state brw_indices = {
index ce9a3ad..8e8e1a7 100644 (file)
@@ -965,7 +965,8 @@ genX(emit_index_buffer)(struct brw_context *brw)
 
    brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
 #if GEN_GEN < 8 && !GEN_IS_HASWELL
-      ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
+      assert(brw->ib.enable_cut_index == brw->prim_restart.enable_cut_index);
+      ib.CutIndexEnable = brw->ib.enable_cut_index;
 #endif
       ib.IndexFormat = brw_get_index_type(index_buffer->index_size);
 
@@ -2408,7 +2409,7 @@ set_scissor_bits(const struct gl_context *ctx, int i,
 
    bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
    bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
-   bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
+   bbox[2] = CLAMP(ctx->ViewportArray[i].Y, 0, fb_height);
    bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
    _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
 
index 0cfe2ac..c893235 100644 (file)
@@ -180,14 +180,16 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_conditional_render_inverted = true;
       ctx->Extensions.ARB_cull_distance = true;
       ctx->Extensions.ARB_draw_buffers_blend = true;
-      if (ctx->API != API_OPENGL_COMPAT)
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion)
          ctx->Extensions.ARB_enhanced_layouts = true;
       ctx->Extensions.ARB_ES3_compatibility = true;
       ctx->Extensions.ARB_fragment_layer_viewport = true;
       ctx->Extensions.ARB_pipeline_statistics_query = true;
       ctx->Extensions.ARB_sample_shading = true;
       ctx->Extensions.ARB_shading_language_420pack = true;
-      if (ctx->API != API_OPENGL_COMPAT) {
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion) {
          ctx->Extensions.ARB_texture_buffer_object = true;
          ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
          ctx->Extensions.ARB_texture_buffer_range = true;
@@ -197,7 +199,8 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_texture_multisample = true;
       ctx->Extensions.ARB_uniform_buffer_object = true;
 
-      if (ctx->API != API_OPENGL_COMPAT)
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion)
          ctx->Extensions.AMD_vertex_shader_layer = true;
       ctx->Extensions.EXT_framebuffer_multisample = true;
       ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
@@ -226,7 +229,8 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_conservative_depth = true;
       ctx->Extensions.ARB_derivative_control = true;
       ctx->Extensions.ARB_framebuffer_no_attachments = true;
-      if (ctx->API != API_OPENGL_COMPAT) {
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion) {
          ctx->Extensions.ARB_gpu_shader5 = true;
          ctx->Extensions.ARB_gpu_shader_fp64 = devinfo->has_64bit_types;
       }
@@ -237,7 +241,8 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.ARB_shader_image_size = true;
       ctx->Extensions.ARB_shader_precision = true;
       ctx->Extensions.ARB_shader_texture_image_samples = true;
-      if (ctx->API != API_OPENGL_COMPAT)
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion)
          ctx->Extensions.ARB_tessellation_shader = true;
       ctx->Extensions.ARB_texture_compression_bptc = true;
       ctx->Extensions.ARB_texture_view = true;
@@ -246,7 +251,6 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.EXT_shader_samples_identical = true;
       ctx->Extensions.OES_primitive_bounding_box = true;
       ctx->Extensions.OES_texture_buffer = true;
-      ctx->Extensions.ARB_fragment_shader_interlock = true;
 
       if (can_do_pipelined_register_writes(brw->screen)) {
          ctx->Extensions.ARB_draw_indirect = true;
@@ -311,6 +315,30 @@ intelInitExtensions(struct gl_context *ctx)
       ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
       ctx->Extensions.KHR_texture_compression_astc_ldr = true;
       ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
+
+      /*
+       * From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
+       *  "A memory fence message issued by a thread causes further messages
+       *   issued by the thread to be blocked until all previous data port
+       *   messages have completed, or the results can be globally observed from
+       *   the point of view of other threads in the system."
+       *
+       * From the Haswell PRM Vol. 7 (Memory Fence, page 256):
+       *  "A memory fence message issued by a thread causes further messages
+       *   issued by the thread to be blocked until all previous messages issued
+       *   by the thread to that data port (data cache or render cache) have
+       *   been globally observed from the point of view of other threads in the
+       *   system."
+       *
+       * Summarized: For ARB_fragment_shader_interlock to work, we need to
+       * ensure memory access ordering for all messages to the dataport from
+       * all threads. Memory fence messages prior to SKL only provide memory
+       * access ordering for messages from the same thread, so we can only
+       * support the feature from Gen9 onwards.
+       *
+       */
+
+      ctx->Extensions.ARB_fragment_shader_interlock = true;
    }
 
    if (gen_device_info_is_9lp(devinfo))
@@ -319,7 +347,8 @@ intelInitExtensions(struct gl_context *ctx)
    if (devinfo->gen >= 6)
       ctx->Extensions.INTEL_performance_query = true;
 
-   if (ctx->API != API_OPENGL_COMPAT)
+   if (ctx->API != API_OPENGL_COMPAT ||
+       ctx->Const.AllowHigherCompatVersion)
       ctx->Extensions.ARB_base_instance = true;
    if (ctx->API != API_OPENGL_CORE)
       ctx->Extensions.ARB_color_buffer_float = true;
index d98c823..a0f47bb 100644 (file)
@@ -54,6 +54,9 @@ if dri_drivers != []
       dep_selinux, dep_libdrm, dep_expat, dep_m, dep_thread, dep_dl, idep_nir,
     ],
     link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
+    # Will be deleted during installation, see install_megadrivers.py
+    install : true,
+    install_dir : dri_drivers_path,
   )
 
   meson.add_install_script(
@@ -78,7 +81,7 @@ if with_dri
     filebase : 'dri',
     description : 'Direct Rendering Infrastructure',
     version : meson.project_version(),
-    variables : ['dridriverdir=${prefix}/' + dri_drivers_path],
+    variables : ['dridriverdir=' + dri_drivers_path],
     requires_private : dri_req_private,
   )
 endif
index 1cd0caf..4f1fb0d 100644 (file)
@@ -239,6 +239,9 @@ _mesa_gl_vdebug(struct gl_context *ctx,
    _mesa_debug_get_id(id);
 
    len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args);
+   if (len >= MAX_DEBUG_MESSAGE_LENGTH)
+      /* message was truncated */
+      len = MAX_DEBUG_MESSAGE_LENGTH - 1;
 
    _mesa_log_msg(ctx, source, type, *id, severity, len, s);
 }
index 1b1679e..4623dc4 100644 (file)
@@ -870,6 +870,9 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
       break;
 
    /* GL_EXT_external_objects */
+   case GL_NUM_DEVICE_UUIDS_EXT:
+      v->value_int = 1;
+      break;
    case GL_DRIVER_UUID_EXT:
       _mesa_get_driver_uuid(ctx, v->value_int_4);
       break;
index 2bc1b6d..4073030 100644 (file)
@@ -271,6 +271,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
    p->Name = strdup(name ? name : "");
    p->Type = type;
    p->Size = size;
+   p->Padded = pad_and_align;
    p->DataType = datatype;
 
    paramList->ParameterValueOffset[oldNum] = oldValNum;
index cc551c1..d3d5961 100644 (file)
@@ -104,6 +104,12 @@ struct gl_program_parameter
     * A sequence of STATE_* tokens and integers to identify GL state.
     */
    gl_state_index16 StateIndexes[STATE_LENGTH];
+
+   /**
+    * We need to keep track of whether the param is padded for use in the
+    * shader cache.
+    */
+   bool Padded;
 };
 
 
index 581a863..3738847 100644 (file)
@@ -290,13 +290,16 @@ st_nir_assign_uniform_locations(struct gl_context *ctx,
       } else {
          loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
 
-         if (ctx->Const.PackedDriverUniformStorage) {
+         /* We need to check that loc is not -1 here before accessing the
+          * array. It can be negative for example when we have a struct that
+          * only contains opaque types.
+          */
+         if (loc >= 0 && ctx->Const.PackedDriverUniformStorage) {
             loc = prog->Parameters->ParameterValueOffset[loc];
          }
       }
 
       uniform->data.driver_location = loc;
-
       max = MAX2(max, loc + type_size(uniform->type));
    }
    *size = max;
index a937c46..830927a 100644 (file)
@@ -227,6 +227,9 @@ TODO: document the other workarounds.
         <application name="Civilization 6" executable="Civ6">
             <option name="mesa_glthread" value="true"/>
         </application>
+        <application name="Civilization 6" executable="Civ6Sub">
+            <option name="mesa_glthread" value="true"/>
+        </application>
 
         <application name="Dreamfall Chapters" executable="Dreamfall Chapters">
             <option name="mesa_glthread" value="true"/>
@@ -334,5 +337,11 @@ TODO: document the other workarounds.
         <application name="No Mans Sky" executable="NMS.exe">
             <option name="radeonsi_zerovram" value="true" />
         </application>
+        <application name="Civilization 6" executable="Civ6">
+            <option name="radeonsi_enable_nir" value="true"/>
+        </application>
+        <application name="Civilization 6" executable="Civ6Sub">
+            <option name="radeonsi_enable_nir" value="true"/>
+        </application>
     </device>
 </driconf>
index e0a30f5..2fa7c42 100644 (file)
@@ -342,3 +342,8 @@ DRI_CONF_OPT_END
 DRI_CONF_OPT_BEGIN_B(radeonsi_zerovram, def) \
         DRI_CONF_DESC(en,"Zero all vram allocations") \
 DRI_CONF_OPT_END
+
+#define DRI_CONF_RADEONSI_ENABLE_NIR(def) \
+DRI_CONF_OPT_BEGIN_B(radeonsi_enable_nir, def) \
+        DRI_CONF_DESC(en,gettext("Enable NIR")) \
+DRI_CONF_OPT_END
index 15e4ff4..241fa44 100644 (file)
@@ -36,10 +36,17 @@ vk_enum_to_str = custom_target(
   ],
 )
 
+vulkan_util_deps = []
+
+if with_platform_wayland
+  vulkan_util_deps += dep_wayland_client
+endif
+
 libvulkan_util = static_library(
   'vulkan_util',
   [files_vulkan_util, vk_enum_to_str],
   include_directories : [inc_common, inc_vulkan],
+  dependencies : vulkan_util_deps,
   c_args : [c_vis_args],
   build_by_default : false,
 )