OSDN Git Service

Merge remote-tracking branch 'mesa/12.0' into marshmallow-x86 android-x86-6.0-r1
authorChih-Wei Huang <cwhuang@linux.org.tw>
Tue, 6 Sep 2016 03:18:33 +0000 (11:18 +0800)
committerChih-Wei Huang <cwhuang@linux.org.tw>
Tue, 6 Sep 2016 03:18:33 +0000 (11:18 +0800)
Conflicts:
src/egl/drivers/dri2/platform_android.c
src/mesa/drivers/dri/i965/intel_syncobj.c

167 files changed:
VERSION
appveyor.yml
bin/.cherry-ignore
configure.ac
docs/relnotes/12.0.2.html [new file with mode: 0644]
src/compiler/glsl/glcpp/glcpp-parse.y
src/compiler/glsl/glcpp/glcpp.h
src/compiler/glsl/glcpp/tests/120-undef-builtin.c
src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected
src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c [new file with mode: 0644]
src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected [new file with mode: 0644]
src/compiler/glsl/glsl_lexer.ll
src/compiler/glsl/glsl_parser.yy
src/compiler/glsl/ir.h
src/compiler/glsl/ir_set_program_inouts.cpp
src/compiler/glsl/link_varyings.cpp
src/compiler/glsl/linker.cpp
src/compiler/glsl/lower_packed_varyings.cpp
src/compiler/glsl/opt_conditional_discard.cpp
src/compiler/glsl_types.cpp
src/compiler/nir/nir.c
src/compiler/nir/nir.h
src/compiler/nir/nir_builder.h
src/compiler/nir/nir_inline_functions.c
src/compiler/nir/nir_intrinsics.h
src/compiler/nir/nir_lower_vars_to_ssa.c
src/compiler/nir/nir_phi_builder.h
src/compiler/spirv/vtn_variables.c
src/egl/drivers/dri2/egl_dri2.c
src/egl/drivers/dri2/egl_dri2.h
src/egl/drivers/dri2/platform_android.c
src/egl/drivers/dri2/platform_drm.c
src/egl/drivers/dri2/platform_surfaceless.c
src/egl/drivers/dri2/platform_wayland.c
src/egl/drivers/dri2/platform_x11.c
src/egl/wayland/wayland-egl/wayland-egl-priv.h
src/egl/wayland/wayland-egl/wayland-egl.c
src/gallium/auxiliary/Makefile.sources
src/gallium/auxiliary/util/u_format_r11g11b10f.h
src/gallium/auxiliary/util/u_math.h
src/gallium/auxiliary/util/u_viewport.h [new file with mode: 0644]
src/gallium/auxiliary/vl/vl_winsys_dri3.c
src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
src/gallium/drivers/nouveau/nv30/nv30_state.c
src/gallium/drivers/nouveau/nv50/nv50_formats.c
src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
src/gallium/drivers/nouveau/nvc0/nvc0_context.c
src/gallium/drivers/nouveau/nvc0/nvc0_context.h
src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
src/gallium/drivers/nouveau/nvc0/nve4_compute.c
src/gallium/drivers/radeon/cayman_msaa.c
src/gallium/drivers/radeonsi/cik_sdma.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c
src/gallium/drivers/radeonsi/sid.h
src/gallium/drivers/svga/svga_shader.c
src/gallium/drivers/swr/Makefile.am
src/gallium/drivers/vc4/vc4_bufmgr.c
src/gallium/drivers/vc4/vc4_bufmgr.h
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_emit.c
src/gallium/drivers/vc4/vc4_opt_vpm.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_resource.c
src/gallium/drivers/vc4/vc4_screen.c
src/gallium/drivers/vc4/vc4_screen.h
src/gallium/include/pipe/p_state.h
src/gallium/state_trackers/omx/vid_enc.c
src/gallium/targets/dri/Makefile.am
src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
src/gallium/winsys/svga/drm/vmw_screen.c
src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
src/gbm/main/gbm.c
src/glx/glx_error.c
src/glx/glx_pbuffer.c
src/glx/glxcmds.c
src/glx/glxglvnd.c
src/glx/query_renderer.c
src/glx/tests/fake_glx_screen.cpp
src/intel/genxml/gen6.xml
src/intel/genxml/gen7.xml
src/intel/genxml/gen75.xml
src/intel/genxml/gen8.xml
src/intel/genxml/gen9.xml
src/intel/isl/Makefile.am
src/intel/isl/isl.c
src/intel/isl/isl.h
src/intel/isl/isl_surface_state.c
src/intel/vulkan/Makefile.am
src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_entrypoints_gen.py
src/intel/vulkan/anv_image.c
src/intel/vulkan/anv_meta_blit.c
src/intel/vulkan/anv_meta_blit2d.c
src/intel/vulkan/anv_meta_clear.c
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_pipeline_cache.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/gen7_pipeline.c
src/intel/vulkan/gen8_pipeline.c
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_l3.c
src/intel/vulkan/genX_pipeline.c
src/intel/vulkan/genX_pipeline_util.h
src/loader/loader.c
src/loader/loader_dri3_helper.c
src/mapi/entry_x86-64_tls.h
src/mapi/entry_x86_tls.h
src/mapi/entry_x86_tsd.h
src/mesa/Makefile.sources
src/mesa/SConscript
src/mesa/drivers/dri/i915/intel_context.c
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_compiler.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_nir.h
src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_performance_monitor.c
src/mesa/drivers/dri/i965/brw_pipe_control.c
src/mesa/drivers/dri/i965/brw_tcs.c
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
src/mesa/drivers/dri/i965/intel_batchbuffer.h
src/mesa/drivers/dri/i965/intel_fbo.c
src/mesa/drivers/dri/i965/intel_mipmap_tree.c
src/mesa/drivers/dri/i965/intel_pixel_read.c
src/mesa/drivers/dri/i965/intel_reg.h
src/mesa/drivers/dri/i965/intel_syncobj.c
src/mesa/drivers/dri/i965/intel_tex.c
src/mesa/drivers/dri/nouveau/nv20_state_frag.c
src/mesa/drivers/dri/swrast/swrast.c
src/mesa/main/ffvertex_prog.c
src/mesa/main/genmipmap.c
src/mesa/main/get.c
src/mesa/main/glformats.c
src/mesa/main/glformats.h
src/mesa/main/shader_query.cpp
src/mesa/main/texgetimage.c
src/mesa/main/teximage.c
src/mesa/main/texstorage.c
src/mesa/state_tracker/st_format.c
src/mesa/state_tracker/st_glsl_to_tgsi.cpp
src/mesa/state_tracker/st_vdpau.c
src/mesa/vbo/vbo_exec_array.c
src/mesa/vbo/vbo_save_api.c
src/mesa/vbo/vbo_split_copy.c

diff --git a/VERSION b/VERSION
index b700dc1..f36e00a 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-12.0.1
+12.0.2
index 2e9b9d6..c2efa7e 100644 (file)
@@ -37,6 +37,8 @@ cache:
 - win_flex_bison-2.4.5.zip
 - llvm-3.3.1-msvc2013-mtd.7z
 
+os: Visual Studio 2013
+
 environment:
   WINFLEXBISON_ARCHIVE: win_flex_bison-2.4.5.zip
   LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
@@ -47,6 +49,8 @@ install:
 - python -m pip --version
 # Install Mako
 - python -m pip install --egg Mako
+# Install pywin32 extensions, needed by SCons
+- python -m pip install pypiwin32
 # Install SCons
 - python -m pip install --egg scons==2.4.1
 - scons --version
index 5b0b89c..9cf8d06 100644 (file)
@@ -1,2 +1,5 @@
 # The offending commit that this patch (part) reverts isn't in 12.0
 be32a2132785fbc119f17e62070e007ee7d17af7 i965/compiler: Bring back the INTEL_PRECISE_TRIG environment variable
+
+# The patch depends on the batch_cache work at least.
+89f00f749fda4c1beca38f362c7f86bdc6e32785 a4xx: make sure to actually clamp depth as requested
index 535a2e3..3f5f1a9 100644 (file)
@@ -225,6 +225,7 @@ AX_GCC_FUNC_ATTRIBUTE([packed])
 AX_GCC_FUNC_ATTRIBUTE([pure])
 AX_GCC_FUNC_ATTRIBUTE([returns_nonnull])
 AX_GCC_FUNC_ATTRIBUTE([unused])
+AX_GCC_FUNC_ATTRIBUTE([visibility])
 AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
 AX_GCC_FUNC_ATTRIBUTE([weak])
 
@@ -783,6 +784,7 @@ if test "x$enable_asm" = xyes; then
     esac
 fi
 
+AC_HEADER_MAJOR
 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
@@ -1639,7 +1641,7 @@ esac
 
 AC_ARG_WITH([vulkan-icddir],
     [AS_HELP_STRING([--with-vulkan-icddir=DIR],
-        [directory for the Vulkan driver icd files @<:@${sysconfdir}/vulkan/icd.d@:>@])],
+        [directory for the Vulkan driver icd files @<:@${datarootdir}/vulkan/icd.d@:>@])],
     [VULKAN_ICD_INSTALL_DIR="$withval"],
     [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
 AC_SUBST([VULKAN_ICD_INSTALL_DIR])
@@ -2331,6 +2333,45 @@ swr_llvm_check() {
     fi
 }
 
+swr_require_cxx_feature_flags() {
+    feature_name="$1"
+    preprocessor_test="$2"
+    option_list="$3"
+    output_var="$4"
+
+    AC_MSG_CHECKING([whether $CXX supports $feature_name])
+    AC_LANG_PUSH([C++])
+    save_CXXFLAGS="$CXXFLAGS"
+    save_IFS="$IFS"
+    IFS=","
+    found=0
+    for opts in $option_list
+    do
+        unset IFS
+        CXXFLAGS="$opts $save_CXXFLAGS"
+        AC_COMPILE_IFELSE(
+            [AC_LANG_PROGRAM(
+                [   #if !($preprocessor_test)
+                    #error
+                    #endif
+                ])],
+            [found=1; break],
+            [])
+        IFS=","
+    done
+    IFS="$save_IFS"
+    CXXFLAGS="$save_CXXFLAGS"
+    AC_LANG_POP([C++])
+    if test $found -eq 1; then
+        AC_MSG_RESULT([$opts])
+        eval "$output_var=\$opts"
+        return 0
+    fi
+    AC_MSG_RESULT([no])
+    AC_MSG_ERROR([swr requires $feature_name support])
+    return 1
+}
+
 dnl Duplicates in GALLIUM_DRIVERS_DIRS are removed by sorting it after this block
 if test -n "$with_gallium_drivers"; then
     gallium_drivers=`IFS=', '; echo $with_gallium_drivers`
@@ -2400,29 +2441,20 @@ if test -n "$with_gallium_drivers"; then
         xswr)
             swr_llvm_check "swr"
 
-            AC_MSG_CHECKING([whether $CXX supports c++11/AVX/AVX2])
-            AVX_CXXFLAGS="-march=core-avx-i"
-            AVX2_CXXFLAGS="-march=core-avx2"
-
-            AC_LANG_PUSH([C++])
-            save_CXXFLAGS="$CXXFLAGS"
-            CXXFLAGS="-std=c++11 $CXXFLAGS"
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
-                              [AC_MSG_ERROR([c++11 compiler support not detected])])
-            CXXFLAGS="$save_CXXFLAGS"
-
-            save_CXXFLAGS="$CXXFLAGS"
-            CXXFLAGS="$AVX_CXXFLAGS $CXXFLAGS"
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
-                              [AC_MSG_ERROR([AVX compiler support not detected])])
-            CXXFLAGS="$save_CXXFLAGS"
-
-            save_CFLAGS="$CXXFLAGS"
-            CXXFLAGS="$AVX2_CXXFLAGS $CXXFLAGS"
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
-                              [AC_MSG_ERROR([AVX2 compiler support not detected])])
-            CXXFLAGS="$save_CXXFLAGS"
-            AC_LANG_POP([C++])
+            swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
+                ",-std=c++11" \
+                SWR_CXX11_CXXFLAGS
+            AC_SUBST([SWR_CXX11_CXXFLAGS])
+
+            swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
+                ",-mavx,-march=core-avx" \
+                SWR_AVX_CXXFLAGS
+            AC_SUBST([SWR_AVX_CXXFLAGS])
+
+            swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
+                ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+                SWR_AVX2_CXXFLAGS
+            AC_SUBST([SWR_AVX2_CXXFLAGS])
 
             HAVE_GALLIUM_SWR=yes
             ;;
diff --git a/docs/relnotes/12.0.2.html b/docs/relnotes/12.0.2.html
new file mode 100644 (file)
index 0000000..385ef08
--- /dev/null
@@ -0,0 +1,403 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.2 Release Notes / September 2, 2016</h1>
+
+<p>
+Mesa 12.0.2 is a bug fix release which fixes bugs found since the 12.0.1 release.
+</p>
+<p>
+Mesa 12.0.2 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+a08565ab1273751ebe2ffa928cbf785056594c803077c9719d0763da780f2918  mesa-12.0.2.tar.gz
+d957a5cc371dcd7ff2aa0d87492f263aece46f79352f4520039b58b1f32552cb  mesa-12.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96381">Bug 96381</a> - Texture artifacts with immutable texture storage and mipmaps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97567">Bug 97567</a> - [SNB, ILK] ctl, piglit regressions in mesa 12.0.2rc1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>configure.ac: Use ${datarootdir} for --with-vulkan-icddir help string too</li>
+</ul>
+
+<p>Bernard Kilarski (1):</p>
+<ul>
+  <li>glx: fix error code when there is no context bound</li>
+</ul>
+
+<p>Brian Paul (4):</p>
+<ul>
+  <li>svga: handle mismatched number of samplers, sampler views</li>
+  <li>mesa: use _mesa_clear_texture_image() in clear_texture_fields()</li>
+  <li>swrast: fix incorrectly positioned putImage() in swrast driver</li>
+  <li>mesa: fix format conversion bug in get_tex_rgba_uncompressed()</li>
+</ul>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965: Fix miptree layout for EGLImage-based renderbuffers</li>
+  <li>i965: Respect miptree offsets in intel_readpixels_tiled_memcpy()</li>
+</ul>
+
+<p>Christian König (1):</p>
+<ul>
+  <li>st/mesa: fix reference counting bug in st_vdpau</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>swr: Refactor checks for compiler feature flags</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: Fix fixed function spot lighting on newer hardware (again)</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>anv: fix writemask on blit fragment shader.</li>
+  <li>st/glsl_to_tgsi: fix st_src_reg_for_double constant.</li>
+</ul>
+
+<p>Emil Velikov (15):</p>
+<ul>
+  <li>docs: add sha256 checksums for 12.0.1</li>
+  <li>mesa: automake: list builddir before srcdir</li>
+  <li>mesa: scons: list builddir before srcdir</li>
+  <li>i965: store reference to the context within struct brw_fence (v2)</li>
+  <li>anv: remove internal 'validate' layer</li>
+  <li>anv: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+  <li>anv: automake: build with -Bsymbolic</li>
+  <li>anv: do not export the Vulkan API</li>
+  <li>anv: remove dummy VK_DEBUG_MARKER_EXT entry points</li>
+  <li>isl: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+  <li>cherry-ignore: temporary(?) drop "a4xx: make sure to actually clamp depth"</li>
+  <li>i915: Check return value of screen-&gt;image.loader-&gt;getBuffers</li>
+  <li>Revert "i965/miptree: Set logical_depth0 == 6 for cube maps"</li>
+  <li>glx/glvnd: list the strcmp arguments in correct order</li>
+  <li>Update version to 12.0.2</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>vc4: Close our screen's fd on screen close.</li>
+  <li>vc4: Disable early Z with computed depth.</li>
+  <li>vc4: Fix a leak of the src[] array of VPM reads in optimization.</li>
+  <li>vc4: Fix leak of the bo_handles table.</li>
+</ul>
+
+<p>Francisco Jerez (3):</p>
+<ul>
+  <li>i965: Emit SKL VF cache invalidation W/A from brw_emit_pipe_control_flush.</li>
+  <li>i965: Make room in the batch epilogue for three more pipe controls.</li>
+  <li>i965: Fix remaining flush vs invalidate race conditions in brw_emit_pipe_control_flush.</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>platform_android: prevent deadlock in droid_swap_buffers</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>mesa: Strip arrayness from interface block names in some IO validation</li>
+  <li>glsl: Pack integer and double varyings as flat even if interpolation mode is none</li>
+  <li>glcpp: Track the actual version instead of just the version_resolved flag</li>
+  <li>glcpp: Only disallow #undef of pre-defined macros on GLSL ES &gt;= 3.00 shaders</li>
+  <li>glsl: Mark cube map array sampler types as reserved in GLSL ES 3.10</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+  <li>mesa: etc2 online compression is unsupported, don't attempt it</li>
+  <li>st/mesa: return appropriate mesa format for ETC texture formats</li>
+  <li>mesa: set _NEW_BUFFERS when updating texture bound to current buffers</li>
+  <li>nv50,nvc0: srgb rendering is only available for rgba/bgra</li>
+  <li>vbo: allow DrawElementsBaseVertex in display lists</li>
+  <li>gallium/util: add helper to compute zmin/zmax for a viewport state</li>
+  <li>nv50,nvc0: fix depth range when halfz is enabled</li>
+  <li>nv50/ir: fix bb positions after exit instructions</li>
+  <li>vbo: add basevertex when looking up elements for vbo splitting</li>
+  <li>a4xx: only disable depth clipping, not all clipping, when requested</li>
+  <li>nv50/ir: make sure cfg iterator always hits all blocks</li>
+  <li>main: add missing EXTRA_END in OES_sample_variables get check</li>
+  <li>nouveau: always enable at least one RC</li>
+  <li>nv30: only bail on color/depth bpp mismatch when surfaces are swizzled</li>
+  <li>a4xx: make sure to actually clamp depth as requested</li>
+  <li>gk110/ir: fix quadop dall emission</li>
+</ul>
+
+<p>Jan Ziak (2):</p>
+<ul>
+  <li>egl/x11: avoid using freed memory if dri2 init fails</li>
+  <li>loader: fix memory leak in loader_dri3_open</li>
+</ul>
+
+<p>Jason Ekstrand (31):</p>
+<ul>
+  <li>nir/spirv: Don't multiply the push constant block size by 4</li>
+  <li>anv: Add a stub for CmdCopyQueryPoolResults on Ivy Bridge</li>
+  <li>glsl/types: Fix function type comparison function</li>
+  <li>glsl/types: Use _mesa_hash_data for hashing function types</li>
+  <li>genxml: Make gen6-7 blending look more like gen8</li>
+  <li>anv/pipeline: Unify blend state setup between gen7 and gen8</li>
+  <li>anv: Enable independentBlend on gen7</li>
+  <li>anv: Add an align_down_npot_u32 helper</li>
+  <li>anv: Handle VK_WHOLE_SIZE properly for buffer views</li>
+  <li>i965/miptree: Enforce that height == 1 for 1-D array textures</li>
+  <li>i965/miptree: Set logical_depth0 == 6 for cube maps</li>
+  <li>nir: Add a nir_deref_foreach_leaf helper</li>
+  <li>nir/inline: Constant-initialize local variables in the callee if needed</li>
+  <li>anv/pipeline: Set up point coord enables</li>
+  <li>i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations</li>
+  <li>i965/vec4: Make opt_vector_float reset at the top of each block</li>
+  <li>anv/blit2d: Add a format parameter to bind_dst and create_iview</li>
+  <li>anv/blit2d: Add support for RGB destinations</li>
+  <li>anv/clear: Make cmd_clear_image take an actual VkClearValue</li>
+  <li>anv/clear: Clear E5B9G9R9 images as R32_UINT</li>
+  <li>anv: Include the pipeline layout in the shader hash</li>
+  <li>isl: Allow multisampled array textures</li>
+  <li>anv/descriptor_set: memset anv_descriptor_set_layout</li>
+  <li>anv/pipeline: Fix bind maps for fragment output arrays</li>
+  <li>anv/allocator: Correctly set the number of buckets</li>
+  <li>anv/pipeline: Properly handle OOM during shader compilation</li>
+  <li>anv: Remove unused fields from anv_pipeline_bind_map</li>
+  <li>anv: Add pipeline_has_stage guards a few places</li>
+  <li>anv: Add a struct for storing a compiled shader</li>
+  <li>anv/pipeline: Add support for caching the push constant map</li>
+  <li>anv: Rework pipeline caching</li>
+</ul>
+
+<p>José Fonseca (2):</p>
+<ul>
+  <li>appveyor: Install pywin32 extensions.</li>
+  <li>appveyor: Force Visual Studio 2013 image.</li>
+</ul>
+
+<p>Kenneth Graunke (21):</p>
+<ul>
+  <li>genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.</li>
+  <li>genxml: Add APIMODE_D3D missing enum values and improve consistency.</li>
+  <li>anv: Fix near plane clipping on Gen7/7.5.</li>
+  <li>anv: Enable early culling on Gen7.</li>
+  <li>anv: Unify 3DSTATE_CLIP code across generations.</li>
+  <li>genxml: Rename "API Rendering Disable" to "Rendering Disable".</li>
+  <li>anv: Properly call gen75_emit_state_base_address on Haswell.</li>
+  <li>i965: Include VUE handles for GS with invocations &gt; 1.</li>
+  <li>nir: Add a base const_index to shared atomic intrinsics.</li>
+  <li>i965: Fix shared atomic intrinsics to pay attention to base.</li>
+  <li>mesa: Add GL_BGRA_EXT to the list of GenerateMipmap internal formats.</li>
+  <li>mesa: Don't call GenerateMipmap if Width or Height == 0.</li>
+  <li>glsl: Delete bogus ir_set_program_inouts assert.</li>
+  <li>glsl: Fix the program resource names of gl_TessLevelOuter/Inner[].</li>
+  <li>glsl: Fix location bias for patch variables.</li>
+  <li>glsl: Fix invariant matching in GLSL 4.30 and GLSL ES 1.00.</li>
+  <li>mesa: Fix uf10_to_f32() scale factor in the E == 0 and M != 0 case.</li>
+  <li>nir/builder: Add bany_inequal and bany helpers.</li>
+  <li>i965: Implement the WaPreventHSTessLevelsInterference workaround.</li>
+  <li>i965: Fix execution size of scalar TCS barrier setup code.</li>
+  <li>i965: Fix barrier count shift in scalar TCS backend.</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>st/omx/enc: check uninitialized list from task release</li>
+  <li>vl/dri3: fix a memory leak from front buffer</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+  <li>glsl_to_tgsi: don't use the negate modifier in integer ops after bitcast</li>
+  <li>radeonsi: add a workaround for a compute VGPR-usage LLVM bug</li>
+  <li>winsys/amdgpu: disallow DCC with mipmaps</li>
+  <li>gallium/util: fix align64</li>
+  <li>radeonsi: only set dual source blending for MRT0</li>
+  <li>radeonsi: fix VM faults due NULL internal const buffers on CIK</li>
+  <li>radeonsi: disable SDMA texture copying on Carrizo</li>
+</ul>
+
+<p>Matt Turner (4):</p>
+<ul>
+  <li>mapi: Massage code to allow clang to compile.</li>
+  <li>i965/vec4: Ignore swizzle of VGRF for use by var_range_end().</li>
+  <li>mesa: Use AC_HEADER_MAJOR to include correct header for major().</li>
+  <li>nir: Walk blocks in source code order in lower_vars_to_ssa.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>glx: Don't use current context in __glXSendError</li>
+</ul>
+
+<p>Miklós Máté (1):</p>
+<ul>
+  <li>vbo: set draw_id</li>
+</ul>
+
+<p>Nanley Chery (5):</p>
+<ul>
+  <li>anv/descriptor_set: Fix binding partly undefined descriptor sets</li>
+  <li>isl: Fix assert on raw buffer surface state size</li>
+  <li>anv/device: Fix max buffer range limits</li>
+  <li>isl: Fix isl_tiling_is_any_y()</li>
+  <li>anv/gen7_pipeline: Set PixelShaderKillPixel for discards</li>
+</ul>
+
+<p>Nicolai Hähnle (7):</p>
+<ul>
+  <li>radeonsi: explicitly choose center locations for 1xAA on Polaris</li>
+  <li>radeonsi: fix Polaris MSAA regression</li>
+  <li>radeonsi: ensure sample locations are set for line and polygon smoothing</li>
+  <li>st_glsl_to_tgsi: only skip over slots of an input array that are present</li>
+  <li>glsl: fix optimization of discard nested multiple levels</li>
+  <li>radeonsi: flush TC L2 cache for indirect draw data</li>
+  <li>radeonsi: add si_set_rw_buffer to be used for internal descriptors</li>
+</ul>
+
+<p>Nicolas Boichat (6):</p>
+<ul>
+  <li>egl/dri2: dri2_make_current: Set EGL error if bindContext fails</li>
+  <li>egl/wayland: Set disp-&gt;DriverData to NULL on error</li>
+  <li>egl/surfaceless: Set disp-&gt;DriverData to NULL on error</li>
+  <li>egl/drm: Set disp-&gt;DriverData to NULL on error</li>
+  <li>egl/android: Set dpy-&gt;DriverData to NULL on error</li>
+  <li>egl/dri2: Add reference count for dri2_egl_display</li>
+</ul>
+
+<p>Rob Herring (3):</p>
+<ul>
+  <li>Android: add missing u_math.h include path for libmesa_isl</li>
+  <li>vc4: fix vc4_resource_from_handle() stride calculation</li>
+  <li>vc4: add hash table look-up for exported dmabufs</li>
+</ul>
+
+<p>Samuel Pitoiset (7):</p>
+<ul>
+  <li>nvc0/ir: fix images indirect access on Fermi</li>
+  <li>nvc0: fix the driver cb size when draw parameters are used</li>
+  <li>gm107/ir: add missing NEG modifier for IADD32I</li>
+  <li>gm107/ir: make use of ADD32I for all immediates</li>
+  <li>nvc0: upload sample locations on GM20x</li>
+  <li>nvc0: invalidate textures/samplers on GK104+</li>
+  <li>nv50/ir: always emit the NDV bit for OP_QUADOP</li>
+</ul>
+
+<p>Stefan Dirsch (1):</p>
+<ul>
+  <li>Avoid overflow in 'last' variable of FindGLXFunction(...)</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+  <li>egl/wayland-egl: Fix for segfault in dri2_wl_destroy_surface.</li>
+</ul>
+
+<p>Tim Rowley (2):</p>
+<ul>
+  <li>Revert "gallium: Force blend color to 16-byte alignment"</li>
+  <li>swr: switch from overriding -march to selecting features</li>
+</ul>
+
+<p>Tomasz Figa (8):</p>
+<ul>
+  <li>gallium/dri: Add shared glapi to LIBADD on Android</li>
+  <li>egl/android: Remove unused variables</li>
+  <li>egl/android: Check return value of dri2_get_dri_config()</li>
+  <li>egl/android: Stop leaking DRI images</li>
+  <li>gallium/winsys/kms: Fix double refcount when importing from prime FD (v2)</li>
+  <li>gallium/winsys/kms: Fully initialize kms_sw_dt at prime import time (v2)</li>
+  <li>gallium/winsys/kms: Move display target handle lookup to separate function</li>
+  <li>gallium/winsys/kms: Look up the GEM handle after importing a prime FD</li>
+</ul>
+
+
+</div>
+</body>
+</html>
index 4022727..68544ae 100644 (file)
@@ -278,10 +278,34 @@ control_line_success:
        HASH_TOKEN DEFINE_TOKEN define
 |      HASH_TOKEN UNDEF IDENTIFIER NEWLINE {
                macro_t *macro;
-               if (strcmp("__LINE__", $3) == 0
-                   || strcmp("__FILE__", $3) == 0
-                   || strcmp("__VERSION__", $3) == 0
-                   || strncmp("GL_", $3, 3) == 0)
+
+                /* Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says:
+                 *
+                 *    It is an error to undefine or to redefine a built-in
+                 *    (pre-defined) macro name.
+                 *
+                 * The GLSL ES 1.00 spec does not contain this text.
+                 *
+                 * Section 3.3 (Preprocessor) of the GLSL 1.30 spec says:
+                 *
+                 *    #define and #undef functionality are defined as is
+                 *    standard for C++ preprocessors for macro definitions
+                 *    both with and without macro parameters.
+                 *
+                 * At least as far as I can tell GCC allow '#undef __FILE__'.
+                 * Furthermore, there are desktop OpenGL conformance tests
+                 * that expect '#undef __VERSION__' and '#undef
+                 * GL_core_profile' to work.
+                 *
+                 * Only disallow #undef of pre-defined macros on GLSL ES >=
+                 * 3.00 shaders.
+                 */
+               if (parser->is_gles &&
+                    parser->version >= 300 &&
+                    (strcmp("__LINE__", $3) == 0
+                     || strcmp("__FILE__", $3) == 0
+                     || strcmp("__VERSION__", $3) == 0
+                     || strncmp("GL_", $3, 3) == 0))
                        glcpp_error(& @1, parser, "Built-in (pre-defined)"
                                    " macro names cannot be undefined.");
 
@@ -396,13 +420,13 @@ control_line_success:
                _glcpp_parser_skip_stack_pop (parser, & @1);
        } NEWLINE
 |      HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
-               if (parser->version_resolved) {
+               if (parser->version != 0) {
                        glcpp_error(& @1, parser, "#version must appear on the first line");
                }
                _glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
        }
 |      HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
-               if (parser->version_resolved) {
+               if (parser->version != 0) {
                        glcpp_error(& @1, parser, "#version must appear on the first line");
                }
                _glcpp_parser_handle_version_declaration(parser, $3, $4, true);
@@ -1345,7 +1369,7 @@ glcpp_parser_create(const struct gl_extensions *extensions, gl_api api)
 
    parser->extensions = extensions;
    parser->api = api;
-   parser->version_resolved = false;
+   parser->version = 0;
 
    parser->has_new_line_number = 0;
    parser->new_line_number = 1;
@@ -2281,10 +2305,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 {
    const struct gl_extensions *extensions = parser->extensions;
 
-   if (parser->version_resolved)
+   if (parser->version != 0)
       return;
 
-   parser->version_resolved = true;
+   parser->version = version;
 
    add_builtin_define (parser, "__VERSION__", version);
 
index d87e6b7..06f3521 100644 (file)
@@ -196,7 +196,7 @@ struct glcpp_parser {
        int error;
        const struct gl_extensions *extensions;
        gl_api api;
-       bool version_resolved;
+       unsigned version;
        bool has_new_line_number;
        int new_line_number;
        bool has_new_source_number;
index 3b736df..498dc0f 100644 (file)
@@ -1,6 +1,7 @@
-0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
 0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
 0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+0:4(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+#version 300 es
 
 
 
diff --git a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c
new file mode 100644 (file)
index 0000000..e3af10d
--- /dev/null
@@ -0,0 +1,4 @@
+#version 110
+#undef __LINE__
+#undef __FILE__
+#undef __VERSION__
diff --git a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected
new file mode 100644 (file)
index 0000000..cd0071f
--- /dev/null
@@ -0,0 +1,4 @@
+#version 110
+
+
+
index 11711ee..c31958b 100644 (file)
@@ -348,10 +348,10 @@ isampler2DMSArray  KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_mul
 usampler2DMSArray  KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);
 
    /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
-samplerCubeArray   KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
-isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
-usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
-samplerCubeArrayShadow   KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
+samplerCubeArray   KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
+isamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
+usamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
+samplerCubeArrayShadow   KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
 
 samplerExternalOES             {
                          if (yyextra->OES_EGL_image_external_enable)
index 3885688..c72f119 100644 (file)
@@ -1784,8 +1784,10 @@ type_qualifier:
        * variables. As only outputs can be declared as invariant, an invariant
        * output from one shader stage will still match an input of a subsequent
        * stage without the input being declared as invariant."
+       *
+       * On the desktop side, this text first appears in GLSL 4.30.
        */
-      if (state->es_shader && state->language_version >= 300 && $$.flags.q.in)
+      if (state->is_version(430, 300) && $$.flags.q.in)
          _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs");
    }
    | interpolation_qualifier type_qualifier
index 93716c4..3809270 100644 (file)
@@ -586,6 +586,13 @@ public:
       return this->u.state_slots;
    }
 
+   inline bool is_interpolation_flat() const
+   {
+      return this->data.interpolation == INTERP_QUALIFIER_FLAT ||
+             this->type->contains_integer() ||
+             this->type->contains_double();
+   }
+
    inline bool is_name_ralloced() const
    {
       return this->name != ir_variable::tmp_name;
index 183b13b..bca1e0a 100644 (file)
@@ -260,15 +260,19 @@ ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var,
     * lowering passes (do_vec_index_to_swizzle() gets rid of indexing into
     * vectors, and lower_packed_varyings() gets rid of structs that occur in
     * varyings).
+    *
+    * However, we don't use varying packing in all cases - tessellation
+    * shaders bypass it.  This means we'll see varying structs and arrays
+    * of structs here.  For now, we just give up so the caller marks the
+    * entire variable as used.
     */
    if (!(type->is_matrix() ||
         (type->is_array() &&
          (type->fields.array->is_numeric() ||
           type->fields.array->is_boolean())))) {
-      assert(!"Unexpected indexing in ir_set_program_inouts");
 
-      /* For safety in release builds, in case we ever encounter unexpected
-       * indexing, give up and let the caller mark the whole variable as used.
+      /* If we don't know how to handle this case, give up and let the
+       * caller mark the whole variable as used.
        */
       return false;
    }
index 5a5adc0..ddf6aa2 100644 (file)
@@ -308,7 +308,25 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
       return;
    }
 
-   if (!prog->IsES && input->data.invariant != output->data.invariant) {
+   /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
+    *
+    *    "As only outputs need be declared with invariant, an output from
+    *     one shader stage will still match an input of a subsequent stage
+    *     without the input being declared as invariant."
+    *
+    * while GLSL 4.20 says:
+    *
+    *    "For variables leaving one shader and coming into another shader,
+    *     the invariant keyword has to be used in both shaders, or a link
+    *     error will result."
+    *
+    * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
+    *
+    *    "The invariance of varyings that are declared in both the vertex
+    *     and fragment shaders must match."
+    */
+   if (input->data.invariant != output->data.invariant &&
+       prog->Version < (prog->IsES ? 300 : 430)) {
       linker_error(prog,
                    "%s shader output `%s' %s invariant qualifier, "
                    "but %s shader input %s invariant qualifier\n",
@@ -1610,7 +1628,8 @@ varying_matches::compute_packing_class(const ir_variable *var)
    unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
                             (var->data.patch << 2);
    packing_class *= 4;
-   packing_class += var->data.interpolation;
+   packing_class += var->is_interpolation_flat()
+      ? unsigned(INTERP_QUALIFIER_FLAT) : var->data.interpolation;
    return packing_class;
 }
 
index 6379ed2..02b3e00 100644 (file)
@@ -3687,6 +3687,18 @@ create_shader_variable(struct gl_shader_program *shProg,
    if (in->data.mode == ir_var_system_value &&
        in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
       out->name = ralloc_strdup(shProg, "gl_VertexID");
+   } else if ((in->data.mode == ir_var_shader_out &&
+               in->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) ||
+              (in->data.mode == ir_var_system_value &&
+               in->data.location == SYSTEM_VALUE_TESS_LEVEL_OUTER)) {
+      out->name = ralloc_strdup(shProg, "gl_TessLevelOuter");
+      type = glsl_type::get_array_instance(glsl_type::float_type, 4);
+   } else if ((in->data.mode == ir_var_shader_out &&
+               in->data.location == VARYING_SLOT_TESS_LEVEL_INNER) ||
+              (in->data.mode == ir_var_system_value &&
+               in->data.location == SYSTEM_VALUE_TESS_LEVEL_INNER)) {
+      out->name = ralloc_strdup(shProg, "gl_TessLevelInner");
+      type = glsl_type::get_array_instance(glsl_type::float_type, 2);
    } else {
       out->name = ralloc_strdup(shProg, name);
    }
@@ -3839,6 +3851,9 @@ add_interface_variables(struct gl_shader_program *shProg,
          continue;
       };
 
+      if (var->data.patch)
+         loc_bias = int(VARYING_SLOT_PATCH0);
+
       /* Skip packed varyings, packed varyings are handled separately
        * by add_packed_varyings.
        */
index 41edada..1e7a8c2 100644 (file)
@@ -273,11 +273,11 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
          continue;
 
       /* This lowering pass is only capable of packing floats and ints
-       * together when their interpolation mode is "flat".  Therefore, to be
-       * safe, caller should ensure that integral varyings always use flat
-       * interpolation, even when this is not required by GLSL.
+       * together when their interpolation mode is "flat".  Treat integers as
+       * being flat when the interpolation mode is none.
        */
       assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
+             var->data.interpolation == INTERP_QUALIFIER_NONE ||
              !var->type->contains_integer());
 
       /* Clone the variable for program resource list before
@@ -607,7 +607,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
    if (this->packed_varyings[slot] == NULL) {
       char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
       const glsl_type *packed_type;
-      if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
+      if (unpacked_var->is_interpolation_flat())
          packed_type = glsl_type::ivec4_type;
       else
          packed_type = glsl_type::vec4_type;
@@ -627,7 +627,8 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
       packed_var->data.centroid = unpacked_var->data.centroid;
       packed_var->data.sample = unpacked_var->data.sample;
       packed_var->data.patch = unpacked_var->data.patch;
-      packed_var->data.interpolation = unpacked_var->data.interpolation;
+      packed_var->data.interpolation = packed_type == glsl_type::ivec4_type
+         ? unsigned(INTERP_QUALIFIER_FLAT) : unpacked_var->data.interpolation;
       packed_var->data.location = location;
       packed_var->data.precision = unpacked_var->data.precision;
       packed_var->data.always_active_io = unpacked_var->data.always_active_io;
index 1ca8803..a27bead 100644 (file)
@@ -72,7 +72,14 @@ opt_conditional_discard_visitor::visit_leave(ir_if *ir)
 
    /* Move the condition and replace the ir_if with the ir_discard. */
    ir_discard *discard = (ir_discard *) ir->then_instructions.head;
-   discard->condition = ir->condition;
+   if (!discard->condition)
+      discard->condition = ir->condition;
+   else {
+      void *ctx = ralloc_parent(ir);
+      discard->condition = new(ctx) ir_expression(ir_binop_logic_and,
+                                                  ir->condition,
+                                                  discard->condition);
+   }
    ir->replace_with(discard);
 
    progress = true;
index 11f1e85..83ce35e 100644 (file)
@@ -1079,7 +1079,7 @@ function_key_compare(const void *a, const void *b)
    const glsl_type *const key2 = (glsl_type *) b;
 
    if (key1->length != key2->length)
-      return 1;
+      return false;
 
    return memcmp(key1->fields.parameters, key2->fields.parameters,
                  (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0;
@@ -1090,20 +1090,8 @@ static uint32_t
 function_key_hash(const void *a)
 {
    const glsl_type *const key = (glsl_type *) a;
-   char hash_key[128];
-   unsigned size = 0;
-
-   size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
-
-   for (unsigned i = 0; i < key->length; i++) {
-      if (size >= sizeof(hash_key))
-        break;
-
-      size += snprintf(& hash_key[size], sizeof(hash_key) - size,
-                      "%p", (void *) key->fields.structure[i].type);
-   }
-
-   return _mesa_hash_string(hash_key);
+   return _mesa_hash_data(key->fields.parameters,
+                          (key->length + 1) * sizeof(*key->fields.parameters));
 }
 
 const glsl_type *
index 3c8b4e0..158ccc1 100644 (file)
@@ -659,6 +659,122 @@ nir_copy_deref(void *mem_ctx, nir_deref *deref)
    return NULL;
 }
 
+/* This is the second step in the recursion.  We've found the tail and made a
+ * copy.  Now we need to iterate over all possible leaves and call the
+ * callback on each one.
+ */
+static bool
+deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail,
+                               nir_deref_foreach_leaf_cb cb, void *state)
+{
+   unsigned length;
+   union {
+      nir_deref_array arr;
+      nir_deref_struct str;
+   } tmp;
+
+   assert(tail->child == NULL);
+   switch (glsl_get_base_type(tail->type)) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_vector_or_scalar(tail->type))
+         return cb(deref, state);
+      /* Fall Through */
+
+   case GLSL_TYPE_ARRAY:
+      tmp.arr.deref.deref_type = nir_deref_type_array;
+      tmp.arr.deref.type = glsl_get_array_element(tail->type);
+      tmp.arr.deref_array_type = nir_deref_array_type_direct;
+      tmp.arr.indirect = NIR_SRC_INIT;
+      tail->child = &tmp.arr.deref;
+
+      length = glsl_get_length(tail->type);
+      for (unsigned i = 0; i < length; i++) {
+         tmp.arr.deref.child = NULL;
+         tmp.arr.base_offset = i;
+         if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+            return false;
+      }
+      return true;
+
+   case GLSL_TYPE_STRUCT:
+      tmp.str.deref.deref_type = nir_deref_type_struct;
+      tail->child = &tmp.str.deref;
+
+      length = glsl_get_length(tail->type);
+      for (unsigned i = 0; i < length; i++) {
+         tmp.arr.deref.child = NULL;
+         tmp.str.deref.type = glsl_get_struct_field(tail->type, i);
+         tmp.str.index = i;
+         if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+            return false;
+      }
+      return true;
+
+   default:
+      unreachable("Invalid type for dereference");
+   }
+}
+
+/* This is the first step of the foreach_leaf recursion.  In this step we are
+ * walking to the end of the deref chain and making a copy in the stack as we
+ * go.  This is because we don't want to mutate the deref chain that was
+ * passed in by the caller.  The downside is that this deref chain is on the
+ * stack and , if the caller wants to do anything with it, they will have to
+ * make their own copy because this one will go away.
+ */
+static bool
+deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail,
+                              nir_deref_foreach_leaf_cb cb, void *state)
+{
+   union {
+      nir_deref_array arr;
+      nir_deref_struct str;
+   } c;
+
+   if (tail->child) {
+      switch (tail->child->deref_type) {
+      case nir_deref_type_array:
+         c.arr = *nir_deref_as_array(tail->child);
+         tail->child = &c.arr.deref;
+         return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state);
+
+      case nir_deref_type_struct:
+         c.str = *nir_deref_as_struct(tail->child);
+         tail->child = &c.str.deref;
+         return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state);
+
+      case nir_deref_type_var:
+      default:
+         unreachable("Invalid deref type for a child");
+      }
+   } else {
+      /* We've gotten to the end of the original deref.  Time to start
+       * building our own derefs.
+       */
+      return deref_foreach_leaf_build_recur(deref, tail, cb, state);
+   }
+}
+
+/**
+ * This function iterates over all of the possible derefs that can be created
+ * with the given deref as the head.  It then calls the provided callback with
+ * a full deref for each one.
+ *
+ * The deref passed to the callback will be allocated on the stack.  You will
+ * need to make a copy if you want it to hang around.
+ */
+bool
+nir_deref_foreach_leaf(nir_deref_var *deref,
+                       nir_deref_foreach_leaf_cb cb, void *state)
+{
+   nir_deref_var copy = *deref;
+   return deref_foreach_leaf_copy_recur(&copy, &copy.deref, cb, state);
+}
+
 /* Returns a load_const instruction that represents the constant
  * initializer for the given deref chain.  The caller is responsible for
  * ensuring that there actually is a constant initializer.
index 9816ed6..dc03918 100644 (file)
@@ -1923,6 +1923,10 @@ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
 
 nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
 
+typedef bool (*nir_deref_foreach_leaf_cb)(nir_deref_var *deref, void *state);
+bool nir_deref_foreach_leaf(nir_deref_var *deref,
+                            nir_deref_foreach_leaf_cb cb, void *state);
+
 nir_load_const_instr *
 nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
 
index 09cdf72..7497efc 100644 (file)
@@ -318,6 +318,25 @@ nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
 }
 
 static inline nir_ssa_def *
+nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+   switch (src0->num_components) {
+   case 1: return nir_ine(b, src0, src1);
+   case 2: return nir_bany_inequal2(b, src0, src1);
+   case 3: return nir_bany_inequal3(b, src0, src1);
+   case 4: return nir_bany_inequal4(b, src0, src1);
+   default:
+      unreachable("bad component size");
+   }
+}
+
+static inline nir_ssa_def *
+nir_bany(nir_builder *b, nir_ssa_def *src)
+{
+   return nir_bany_inequal(b, src, nir_imm_int(b, 0));
+}
+
+static inline nir_ssa_def *
 nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
 {
    unsigned swizzle[4] = {c, c, c, c};
index c36748d..cf31e14 100644 (file)
 #include "nir_builder.h"
 #include "nir_control_flow.h"
 
+static bool
+deref_apply_constant_initializer(nir_deref_var *deref, void *state)
+{
+   struct nir_builder *b = state;
+
+   nir_load_const_instr *initializer =
+      nir_deref_get_const_initializer_load(b->shader, deref);
+   nir_builder_instr_insert(b, &initializer->instr);
+
+   nir_store_deref_var(b, deref, &initializer->def, 0xf);
+
+   return true;
+}
+
 static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
 
 static void
@@ -174,11 +188,35 @@ inline_functions_block(nir_block *block, nir_builder *b,
       /* Add copies of all in parameters */
       assert(call->num_params == callee_copy->num_params);
 
+      b->cursor = nir_before_instr(&call->instr);
+
+      /* Before we insert the copy of the function, we need to lower away
+       * constant initializers on local variables.  This is because constant
+       * initializers happen (effectively) at the top of the function and,
+       * since these are about to become locals of the calling function,
+       * initialization will happen at the top of the caller rather than at
+       * the top of the callee.  This isn't usually a problem, but if we are
+       * being inlined inside of a loop, it can result in the variable not
+       * getting re-initialized properly for all loop iterations.
+       */
+      nir_foreach_variable(local, &callee_copy->locals) {
+         if (!local->constant_initializer)
+            continue;
+
+         nir_deref_var deref;
+         deref.deref.deref_type = nir_deref_type_var,
+         deref.deref.child = NULL;
+         deref.deref.type = local->type,
+         deref.var = local;
+
+         nir_deref_foreach_leaf(&deref, deref_apply_constant_initializer, b);
+
+         local->constant_initializer = NULL;
+      }
+
       exec_list_append(&b->impl->locals, &callee_copy->locals);
       exec_list_append(&b->impl->registers, &callee_copy->registers);
 
-      b->cursor = nir_before_instr(&call->instr);
-
       /* We now need to tie the two functions together using the
        * parameters.  There are two ways we do this: One is to turn the
        * parameter into a local variable and do a shadow-copy.  The other
index 6f86c9f..6a97441 100644 (file)
@@ -266,16 +266,16 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx,
  *    in shared_atomic_add, etc).
  * 2: For CompSwap only: the second data parameter.
  */
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
 
 #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
    INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
index d62cec0..937f8b3 100644 (file)
@@ -471,7 +471,7 @@ lower_copies_to_load_store(struct deref_node *node,
    return true;
 }
 
-/* Performs variable renaming by doing a DFS of the dominance tree
+/* Performs variable renaming
  *
  * This algorithm is very similar to the one outlined in "Efficiently
  * Computing Static Single Assignment Form and the Control Dependence
@@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node,
  * SSA def on the stack per block.
  */
 static bool
-rename_variables_block(nir_block *block, struct lower_variables_state *state)
+rename_variables(struct lower_variables_state *state)
 {
    nir_builder b;
    nir_builder_init(&b, state->impl);
 
-   nir_foreach_instr_safe(instr, block) {
-      if (instr->type != nir_instr_type_intrinsic)
-         continue;
-
-      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-      switch (intrin->intrinsic) {
-      case nir_intrinsic_load_var: {
-         struct deref_node *node =
-            get_deref_node(intrin->variables[0], state);
-
-         if (node == NULL) {
-            /* If we hit this path then we are referencing an invalid
-             * value.  Most likely, we unrolled something and are
-             * reading past the end of some array.  In any case, this
-             * should result in an undefined value.
-             */
-            nir_ssa_undef_instr *undef =
-               nir_ssa_undef_instr_create(state->shader,
-                                          intrin->num_components,
-                                          intrin->dest.ssa.bit_size);
-
-            nir_instr_insert_before(&intrin->instr, &undef->instr);
-            nir_instr_remove(&intrin->instr);
-
-            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&undef->def));
+   nir_foreach_block(block, state->impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
             continue;
-         }
 
-         if (!node->lower_to_ssa)
-            continue;
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
+
+            if (node == NULL) {
+               /* If we hit this path then we are referencing an invalid
+                * value.  Most likely, we unrolled something and are
+                * reading past the end of some array.  In any case, this
+                * should result in an undefined value.
+                */
+               nir_ssa_undef_instr *undef =
+                  nir_ssa_undef_instr_create(state->shader,
+                                             intrin->num_components,
+                                             intrin->dest.ssa.bit_size);
+
+               nir_instr_insert_before(&intrin->instr, &undef->instr);
+               nir_instr_remove(&intrin->instr);
+
+               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                        nir_src_for_ssa(&undef->def));
+               continue;
+            }
 
-         nir_alu_instr *mov = nir_alu_instr_create(state->shader,
-                                                   nir_op_imov);
-         mov->src[0].src = nir_src_for_ssa(
-            nir_phi_builder_value_get_block_def(node->pb_value, block));
-         for (unsigned i = intrin->num_components; i < 4; i++)
-            mov->src[0].swizzle[i] = 0;
+            if (!node->lower_to_ssa)
+               continue;
 
-         assert(intrin->dest.is_ssa);
+            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
+                                                      nir_op_imov);
+            mov->src[0].src = nir_src_for_ssa(
+               nir_phi_builder_value_get_block_def(node->pb_value, block));
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
 
-         mov->dest.write_mask = (1 << intrin->num_components) - 1;
-         nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                           intrin->num_components,
-                           intrin->dest.ssa.bit_size, NULL);
+            assert(intrin->dest.is_ssa);
 
-         nir_instr_insert_before(&intrin->instr, &mov->instr);
-         nir_instr_remove(&intrin->instr);
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components,
+                              intrin->dest.ssa.bit_size, NULL);
 
-         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                  nir_src_for_ssa(&mov->dest.dest.ssa));
-         break;
-      }
-
-      case nir_intrinsic_store_var: {
-         struct deref_node *node =
-            get_deref_node(intrin->variables[0], state);
-
-         if (node == NULL) {
-            /* Probably an out-of-bounds array store.  That should be a
-             * no-op. */
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
             nir_instr_remove(&intrin->instr);
-            continue;
-         }
 
-         if (!node->lower_to_ssa)
-            continue;
-
-         assert(intrin->num_components ==
-                glsl_get_vector_elements(node->type));
-
-         assert(intrin->src[0].is_ssa);
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
+            break;
+         }
 
-         nir_ssa_def *new_def;
-         b.cursor = nir_before_instr(&intrin->instr);
+         case nir_intrinsic_store_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
 
-         unsigned wrmask = nir_intrinsic_write_mask(intrin);
-         if (wrmask == (1 << intrin->num_components) - 1) {
-            /* Whole variable store - just copy the source.  Note that
-             * intrin->num_components and intrin->src[0].ssa->num_components
-             * may differ.
-             */
-            unsigned swiz[4];
-            for (unsigned i = 0; i < 4; i++)
-               swiz[i] = i < intrin->num_components ? i : 0;
+            if (node == NULL) {
+               /* Probably an out-of-bounds array store.  That should be a
+                * no-op. */
+               nir_instr_remove(&intrin->instr);
+               continue;
+            }
 
-            new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
-                                  intrin->num_components, false);
-         } else {
-            nir_ssa_def *old_def =
-               nir_phi_builder_value_get_block_def(node->pb_value, block);
-            /* For writemasked store_var intrinsics, we combine the newly
-             * written values with the existing contents of unwritten
-             * channels, creating a new SSA value for the whole vector.
-             */
-            nir_ssa_def *srcs[4];
-            for (unsigned i = 0; i < intrin->num_components; i++) {
-               if (wrmask & (1 << i)) {
-                  srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
-               } else {
-                  srcs[i] = nir_channel(&b, old_def, i);
+            if (!node->lower_to_ssa)
+               continue;
+
+            assert(intrin->num_components ==
+                   glsl_get_vector_elements(node->type));
+
+            assert(intrin->src[0].is_ssa);
+
+            nir_ssa_def *new_def;
+            b.cursor = nir_before_instr(&intrin->instr);
+
+            unsigned wrmask = nir_intrinsic_write_mask(intrin);
+            if (wrmask == (1 << intrin->num_components) - 1) {
+               /* Whole variable store - just copy the source.  Note that
+                * intrin->num_components and intrin->src[0].ssa->num_components
+                * may differ.
+                */
+               unsigned swiz[4];
+               for (unsigned i = 0; i < 4; i++)
+                  swiz[i] = i < intrin->num_components ? i : 0;
+
+               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+                                     intrin->num_components, false);
+            } else {
+               nir_ssa_def *old_def =
+                  nir_phi_builder_value_get_block_def(node->pb_value, block);
+               /* For writemasked store_var intrinsics, we combine the newly
+                * written values with the existing contents of unwritten
+                * channels, creating a new SSA value for the whole vector.
+                */
+               nir_ssa_def *srcs[4];
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (wrmask & (1 << i)) {
+                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+                  } else {
+                     srcs[i] = nir_channel(&b, old_def, i);
+                  }
                }
+               new_def = nir_vec(&b, srcs, intrin->num_components);
             }
-            new_def = nir_vec(&b, srcs, intrin->num_components);
-         }
 
-         assert(new_def->num_components == intrin->num_components);
+            assert(new_def->num_components == intrin->num_components);
 
-         nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
-         nir_instr_remove(&intrin->instr);
-         break;
-      }
+            nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
+            nir_instr_remove(&intrin->instr);
+            break;
+         }
 
-      default:
-         break;
+         default:
+            break;
+         }
       }
    }
 
-   for (unsigned i = 0; i < block->num_dom_children; ++i)
-      rename_variables_block(block->dom_children[i], state);
-
    return true;
 }
 
@@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
       }
    }
 
-   rename_variables_block(nir_start_block(impl), &state);
+   rename_variables(&state);
 
    nir_phi_builder_finish(state.phi_builder);
 
index edc5302..a4dc18a 100644 (file)
@@ -44,7 +44,8 @@
  *         var.pb_val = nir_phi_builder_add_value(pb, var.defs)
  *
  *     // Visit each block.  This needs to visit dominators first;
- *     // nir_for_each_block() will be ok.
+ *     // nir_foreach_block() will be ok.
+ *
  *     foreach block:
  *         foreach instruction:
  *             foreach use of variable var:
index fe2494b..8b112b0 100644 (file)
@@ -1173,7 +1173,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
       case SpvStorageClassPushConstant:
          var->mode = vtn_variable_mode_push_constant;
          assert(b->shader->num_uniforms == 0);
-         b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
+         b->shader->num_uniforms = vtn_type_block_size(var->type);
          break;
       case SpvStorageClassInput:
          var->mode = vtn_variable_mode_input;
index fe33ecd..fadeb2a 100644 (file)
@@ -758,64 +758,99 @@ dri2_create_screen(_EGLDisplay *disp)
 
 /**
  * Called via eglInitialize(), GLX_drv->API.Initialize().
+ *
+ * This must be guaranteed to be called exactly once, even if eglInitialize is
+ * called many times (without a eglTerminate in between).
  */
 static EGLBoolean
 dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
 {
+   EGLBoolean ret = EGL_FALSE;
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+   /* In the case where the application calls eglMakeCurrent(context1),
+    * eglTerminate, then eglInitialize again (without a call to eglReleaseThread
+    * or eglMakeCurrent(NULL) before that), dri2_dpy structure is still
+    * initialized, as we need it to be able to free context1 correctly.
+    *
+    * It would probably be safest to forcibly release the display with
+    * dri2_display_release, to make sure the display is reinitialized correctly.
+    * However, the EGL spec states that we need to keep a reference to the
+    * current context (so we cannot call dri2_make_current(NULL)), and therefore
+    * we would leak context1 as we would be missing the old display connection
+    * to free it up correctly.
+    */
+   if (dri2_dpy) {
+      dri2_dpy->ref_count++;
+      return EGL_TRUE;
+   }
+
    /* not until swrast_dri is supported */
    if (disp->Options.UseFallback)
       return EGL_FALSE;
 
+   /* Nothing to initialize for a test only display */
+   if (disp->Options.TestOnly)
+      return EGL_TRUE;
+
    switch (disp->Platform) {
 #ifdef HAVE_SURFACELESS_PLATFORM
    case _EGL_PLATFORM_SURFACELESS:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_surfaceless(drv, disp);
+      ret = dri2_initialize_surfaceless(drv, disp);
+      break;
 #endif
-
 #ifdef HAVE_X11_PLATFORM
    case _EGL_PLATFORM_X11:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_x11(drv, disp);
+      ret = dri2_initialize_x11(drv, disp);
+      break;
 #endif
-
 #ifdef HAVE_DRM_PLATFORM
    case _EGL_PLATFORM_DRM:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_drm(drv, disp);
+      ret = dri2_initialize_drm(drv, disp);
+      break;
 #endif
 #ifdef HAVE_WAYLAND_PLATFORM
    case _EGL_PLATFORM_WAYLAND:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_wayland(drv, disp);
+      ret = dri2_initialize_wayland(drv, disp);
+      break;
 #endif
 #ifdef HAVE_ANDROID_PLATFORM
    case _EGL_PLATFORM_ANDROID:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_android(drv, disp);
+      ret = dri2_initialize_android(drv, disp);
+      break;
 #endif
-
    default:
       _eglLog(_EGL_WARNING, "No EGL platform enabled.");
       return EGL_FALSE;
    }
+
+   if (ret) {
+      dri2_dpy = dri2_egl_display(disp);
+
+      if (!dri2_dpy) {
+         return EGL_FALSE;
+      }
+
+      dri2_dpy->ref_count++;
+   }
+
+   return ret;
 }
 
 /**
- * Called via eglTerminate(), drv->API.Terminate().
+ * Decrement display reference count, and free up display if necessary.
  */
-static EGLBoolean
-dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
-{
+static void
+dri2_display_release(_EGLDisplay *disp) {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    unsigned i;
 
-   _eglReleaseDisplayResources(drv, disp);
+   assert(dri2_dpy->ref_count > 0);
+   dri2_dpy->ref_count--;
+
+   if (dri2_dpy->ref_count > 0)
+      return;
+
    _eglCleanupDisplay(disp);
 
    if (dri2_dpy->own_dri_screen)
@@ -870,6 +905,21 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
    }
    free(dri2_dpy);
    disp->DriverData = NULL;
+}
+
+/**
+ * Called via eglTerminate(), drv->API.Terminate().
+ *
+ * This must be guaranteed to be called exactly once, even if eglTerminate is
+ * called many times (without a eglInitialize in between).
+ */
+static EGLBoolean
+dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   /* Release all non-current Context/Surfaces. */
+   _eglReleaseDisplayResources(drv, disp);
+
+   dri2_display_release(disp);
 
    return EGL_TRUE;
 }
@@ -1189,10 +1239,16 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
    _EGLSurface *tmp_dsurf, *tmp_rsurf;
    __DRIdrawable *ddraw, *rdraw;
    __DRIcontext *cctx;
+   EGLBoolean unbind;
+
+   if (!dri2_dpy)
+      return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent");
 
    /* make new bindings */
-   if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf))
+   if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) {
+      /* _eglBindContext already sets the EGL error (in _eglCheckMakeCurrent) */
       return EGL_FALSE;
+   }
 
    /* flush before context switch */
    if (old_ctx && dri2_drv->glFlush)
@@ -1207,14 +1263,21 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
       dri2_dpy->core->unbindContext(old_cctx);
    }
 
-   if ((cctx == NULL && ddraw == NULL && rdraw == NULL) ||
-       dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
+   unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL);
+
+   if (unbind || dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
       if (old_dsurf)
          drv->API.DestroySurface(drv, disp, old_dsurf);
       if (old_rsurf)
          drv->API.DestroySurface(drv, disp, old_rsurf);
-      if (old_ctx)
+
+      if (!unbind)
+         dri2_dpy->ref_count++;
+      if (old_ctx) {
+         EGLDisplay old_disp = _eglGetDisplayHandle(old_ctx->Resource.Display);
          drv->API.DestroyContext(drv, disp, old_ctx);
+         dri2_display_release(old_disp);
+      }
 
       return EGL_TRUE;
    } else {
@@ -1232,7 +1295,11 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
       _eglPutSurface(old_rsurf);
       _eglPutContext(old_ctx);
 
-      return EGL_FALSE;
+      /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but
+       * setting the error to EGL_BAD_MATCH is surely better than leaving it
+       * as EGL_SUCCESS.
+       */
+      return _eglError(EGL_BAD_MATCH, "eglMakeCurrent");
    }
 }
 
index 925294b..02f9604 100644 (file)
@@ -177,6 +177,10 @@ struct dri2_egl_display
    const __DRI2interopExtension *interop;
    int                       fd;
 
+   /* dri2_initialize/dri2_terminate increment/decrement this count, so does
+    * dri2_make_current (tracks if there are active contexts/surfaces). */
+   int                       ref_count;
+
    int                       own_device;
    int                       swap_available;
    int                       invalidate_available;
index 9f598c9..940ed54 100644 (file)
@@ -167,6 +167,8 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf)
 static EGLBoolean
 droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
 {
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
    /* To avoid blocking other EGL calls, release the display mutex before
     * we enter droid_window_enqueue_buffer() and re-acquire the mutex upon
     * return.
@@ -197,6 +199,12 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_sur
    dri2_surf->buffer = NULL;
 
    mtx_lock(&disp->Mutex);
+
+   if (dri2_surf->dri_image) {
+      dri2_dpy->image->destroyImage(dri2_surf->dri_image);
+      dri2_surf->dri_image = NULL;
+   }
+
    return EGL_TRUE;
 }
 
@@ -288,6 +296,8 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
 
    config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
                                 dri2_surf->base.GLColorspace);
+   if (!config)
+      goto cleanup_surface;
 
    if (dri2_dpy->dri2) {
       dri2_surf->dri_drawable =
@@ -381,6 +391,9 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
    int fourcc, pitch;
    int offset = 0, fd;
 
+   if (dri2_surf->dri_image)
+          return 0;
+
    if (!dri2_surf->buffer)
       return -1;
 
@@ -439,10 +452,8 @@ droid_image_get_buffers(__DRIdrawable *driDrawable,
 static EGLBoolean
 droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
 {
-   struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
-   _EGLContext *ctx;
 
    if (dri2_surf->base.Type != EGL_WINDOW_BIT)
       return EGL_TRUE;
@@ -1127,6 +1138,7 @@ cleanup_device:
    close(dri2_dpy->fd);
 cleanup_display:
    free(dri2_dpy);
+   dpy->DriverData = NULL;
 
    return _eglError(EGL_NOT_INITIALIZED, err);
 }
index 9373496..1ce282f 100644 (file)
@@ -726,5 +726,6 @@ cleanup:
       close(fd);
 
    free(dri2_dpy);
+   disp->DriverData = NULL;
    return EGL_FALSE;
 }
index e0ddc12..323a8d7 100644 (file)
@@ -157,6 +157,7 @@ cleanup_driver:
    close(dri2_dpy->fd);
 cleanup_display:
    free(dri2_dpy);
+   disp->DriverData = NULL;
 
    return _eglError(EGL_NOT_INITIALIZED, err);
 }
index ff0d5c8..5930929 100644 (file)
@@ -1238,6 +1238,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
    wl_event_queue_destroy(dri2_dpy->wl_queue);
  cleanup_dpy:
    free(dri2_dpy);
+   disp->DriverData = NULL;
 
    return EGL_FALSE;
 }
@@ -1883,6 +1884,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
    wl_event_queue_destroy(dri2_dpy->wl_queue);
  cleanup_dpy:
    free(dri2_dpy);
+   disp->DriverData = NULL;
 
    return EGL_FALSE;
 }
index c0a4005..686552c 100644 (file)
@@ -1231,6 +1231,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
       xcb_disconnect(dri2_dpy->conn);
  cleanup_dpy:
    free(dri2_dpy);
+   disp->DriverData = NULL;
 
    return EGL_FALSE;
 }
@@ -1370,6 +1371,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
       xcb_disconnect(dri2_dpy->conn);
  cleanup_dpy:
    free(dri2_dpy);
+   disp->DriverData = NULL;
 
    return EGL_FALSE;
 }
@@ -1467,6 +1469,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
       xcb_disconnect(dri2_dpy->conn);
  cleanup_dpy:
    free(dri2_dpy);
+   disp->DriverData = NULL;
 
    return EGL_FALSE;
 }
index f1e3ba2..c91f9cd 100644 (file)
@@ -27,6 +27,7 @@ struct wl_egl_window {
 
        void *private;
        void (*resize_callback)(struct wl_egl_window *, void *);
+       void (*destroy_window_callback)(void *);
 };
 
 #ifdef  __cplusplus
index 80a5be5..4a4701a 100644 (file)
@@ -66,6 +66,7 @@ wl_egl_window_create(struct wl_surface *surface,
        egl_window->surface = surface;
        egl_window->private = NULL;
        egl_window->resize_callback = NULL;
+       egl_window->destroy_window_callback = NULL;
        wl_egl_window_resize(egl_window, width, height, 0, 0);
        egl_window->attached_width  = 0;
        egl_window->attached_height = 0;
@@ -76,6 +77,8 @@ wl_egl_window_create(struct wl_surface *surface,
 WL_EGL_EXPORT void
 wl_egl_window_destroy(struct wl_egl_window *egl_window)
 {
+       if (egl_window->destroy_window_callback)
+               egl_window->destroy_window_callback(egl_window->private);
        free(egl_window);
 }
 
index 6077976..013bc88 100644 (file)
@@ -310,7 +310,8 @@ C_SOURCES := \
        util/u_upload_mgr.h \
        util/u_vbuf.c \
        util/u_vbuf.h \
-       util/u_video.h
+       util/u_video.h \
+       util/u_viewport.h
 
 NIR_SOURCES := \
        nir/tgsi_to_nir.c \
index 218822b..074783a 100644 (file)
@@ -194,7 +194,7 @@ static inline float uf10_to_f32(uint16_t val)
 
    if (exponent == 0) {
       if (mantissa != 0) {
-         const float scale = 1.0 / (1 << 20);
+         const float scale = 1.0 / (1 << 19);
          f32.f = scale * mantissa;
       }
    }
index 8916a96..55343e8 100644 (file)
@@ -831,7 +831,7 @@ align(int value, int alignment)
 static inline uint64_t
 align64(uint64_t value, unsigned alignment)
 {
-   return (value + alignment - 1) & ~(alignment - 1);
+   return (value + alignment - 1) & ~((uint64_t)alignment - 1);
 }
 
 /**
diff --git a/src/gallium/auxiliary/util/u_viewport.h b/src/gallium/auxiliary/util/u_viewport.h
new file mode 100644 (file)
index 0000000..a731b34
--- /dev/null
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Ilia Mirkin.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_VIEWPORT_H
+#define U_VIEWPORT_H
+
+#include "c99_compat.h"
+#include "pipe/p_state.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+util_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
+                        float *zmin, float *zmax)
+{
+   float a, b;
+   if (halfz) {
+      a = vp->translate[2];
+      b = vp->translate[2] + vp->scale[2];
+   } else {
+      a = vp->translate[2] - vp->scale[2];
+      b = vp->translate[2] + vp->scale[2];
+   }
+
+   *zmin = a < b ? a : b;
+   *zmax = a < b ? b : a;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index f7f572e..493e645 100644 (file)
@@ -89,6 +89,7 @@ dri3_free_front_buffer(struct vl_dri3_screen *scrn,
 {
    xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
    xshmfence_unmap_shm(buffer->shm_fence);
+   pipe_resource_reference(&buffer->texture, NULL);
    FREE(buffer);
 }
 
index d9a7bb5..aeb61e7 100644 (file)
@@ -1376,7 +1376,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
 {
        return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
 }
-#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE                                0x00000080
+#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE                   0x00000080
 #define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE                  0x00010000
 #define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS                        0x00020000
 #define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE                    0x80000000
@@ -3145,6 +3145,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
 
 #define REG_A4XX_GRAS_CL_CLIP_CNTL                             0x00002000
 #define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE                    0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE              0x00010000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE               0x00020000
 #define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z                 0x00400000
 
 #define REG_A4XX_GRAS_CLEAR_CNTL                               0x00002003
index 00e985d..8b350ae 100644 (file)
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
 #include "util/u_format.h"
+#include "util/u_viewport.h"
 
 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
@@ -544,12 +545,14 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                                A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
        }
 
-       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
                bool fragz = fp->has_kill | fp->writes_pos;
+               bool clamp = !ctx->rasterizer->depth_clip;
 
                OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
                OUT_RING(ring, zsa->rb_depth_control |
+                               COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
                                COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
                                COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
 
@@ -636,6 +639,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
        }
 
+       if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+               float zmin, zmax;
+               int depth = 24;
+               if (ctx->framebuffer.zsbuf) {
+                       depth = util_format_get_component_bits(
+                                       pipe_surface_format(ctx->framebuffer.zsbuf),
+                                       UTIL_FORMAT_COLORSPACE_ZS, 0);
+               }
+               util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+                                                               &zmin, &zmax);
+
+               OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
+               if (depth == 32) {
+                       OUT_RING(ring, fui(zmin));
+                       OUT_RING(ring, fui(zmax));
+               } else if (depth == 16) {
+                       OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+                       OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+               } else {
+                       OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+                       OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+               }
+       }
+
        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
                struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
                unsigned n = pfb->nr_cbufs;
index 7456c63..b3a4292 100644 (file)
@@ -98,7 +98,8 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
                so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
 
        if (!cso->depth_clip)
-               so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+               so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
+                       A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
        if (cso->clip_halfz)
                so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
 
index 80e0990..939972d 100644 (file)
@@ -1321,15 +1321,12 @@ void
 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
 {
    code[0] = 0x00000002 | ((qOp & 1) << 31);
-   code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
+   code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
 
    defId(i->def(0), 2);
    srcId(i->src(0), 10);
    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
 
-   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
-      code[1] |= 1 << 9; // dall
-
    emitPredicate(i);
 }
 
index e62d807..8d6bc9d 100644 (file)
@@ -1682,7 +1682,7 @@ CodeEmitterGM107::emitNOT()
 void
 CodeEmitterGM107::emitIADD()
 {
-   if (!longIMMD(insn->src(1))) {
+   if (insn->src(1).getFile() != FILE_IMMEDIATE) {
       switch (insn->src(1).getFile()) {
       case FILE_GPR:
          emitInsn(0x5c100000);
@@ -1707,6 +1707,7 @@ CodeEmitterGM107::emitIADD()
       emitX  (0x2b);
    } else {
       emitInsn(0x1c000000);
+      emitNEG (0x38, insn->src(0));
       emitSAT (0x36);
       emitX   (0x35);
       emitCC  (0x34);
index 5d68e99..ca10848 100644 (file)
@@ -2112,7 +2112,7 @@ makeInstructionLong(Instruction *insn)
    insn->encSize = 8;
 
    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
-      fn->bbArray[i]->binPos += 4;
+      fn->bbArray[i]->binPos += adj;
    }
    fn->binSize += adj;
    insn->bb->binSize += adj;
@@ -2164,9 +2164,16 @@ replaceExitWithModifier(Function *func)
             return;
       }
    }
-   epilogue->binSize -= 8;
-   func->binSize -= 8;
+
+   int adj = epilogue->getExit()->encSize;
+   epilogue->binSize -= adj;
+   func->binSize -= adj;
    delete_Instruction(func->getProgram(), epilogue->getExit());
+
+   // There may be BB's that are laid out after the exit block
+   for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
+      func->bbArray[i]->binPos -= adj;
+   }
 }
 
 void
index bc94285..96f00c1 100644 (file)
@@ -1356,16 +1356,13 @@ CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
 void
 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
 {
-   code[0] = 0x00000000 | (laneMask << 6);
+   code[0] = 0x00000200 | (laneMask << 6); // dall
    code[1] = 0x48000000 | qOp;
 
    defId(i->def(0), 14);
    srcId(i->src(0), 20);
    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
 
-   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
-      code[0] |= 1 << 9; // dall
-
    emitPredicate(i);
 }
 
index 23414d5..b1076cf 100644 (file)
@@ -287,7 +287,10 @@ private:
 
       bb.push(node);
 
-      while (bb.getSize()) {
+      while (bb.getSize() || cross.getSize()) {
+         if (bb.getSize() == 0)
+            cross.moveTo(bb);
+
          node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
          assert(node);
          if (!node->visit(sequence))
@@ -314,9 +317,6 @@ private:
             }
          }
          nodes[count++] = node;
-
-         if (bb.getSize() == 0)
-            cross.moveTo(bb);
       }
    }
 
index 6a6b44c..143b318 100644 (file)
@@ -2056,6 +2056,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
       base = 0;
    }
 
+   if (ind) {
+      Value *ptr;
+      ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r));
+      ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+      su->setIndirectR(ptr);
+   }
+
    // get surface coordinates
    for (c = 0; c < arg; ++c)
       src[c] = su->getSrc(c);
index fd604c2..43ecaac 100644 (file)
@@ -379,8 +379,9 @@ nv30_set_framebuffer_state(struct pipe_context *pipe,
        struct nv30_miptree *zeta_mt = nv30_miptree(fb->zsbuf->texture);
 
        if (color_mt->swizzled != zeta_mt->swizzled ||
-           (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
-           (util_format_get_blocksize(fb->cbufs[0]->format) > 2)) {
+           (color_mt->swizzled &&
+            (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
+            (util_format_get_blocksize(fb->cbufs[0]->format) > 2))) {
           nv30->framebuffer.zsbuf = NULL;
           debug_printf("Mismatched color and zeta formats, ignoring zeta.\n");
        }
index 34d32d1..6ea5a47 100644 (file)
@@ -161,7 +161,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
    F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),
 
    F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
-   F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+   F3(A, L8_SRGB, NONE, R, R, R, xx, UNORM, R8, T),
    F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
    I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
    I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
@@ -203,7 +203,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
    C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
    C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
    C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
-   C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+   C4(A, L8A8_SRGB, NONE, R, R, R, G, UNORM, G8R8, T),
    C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
    C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
    C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
index 65f7338..a67a390 100644 (file)
@@ -1,5 +1,6 @@
 
 #include "util/u_format.h"
+#include "util/u_viewport.h"
 
 #include "nv50/nv50_context.h"
 
@@ -265,8 +266,12 @@ nv50_validate_viewport(struct nv50_context *nv50)
       PUSH_DATAf(push, vpt->scale[1]);
       PUSH_DATAf(push, vpt->scale[2]);
 
-      zmin = vpt->translate[2] - fabsf(vpt->scale[2]);
-      zmax = vpt->translate[2] + fabsf(vpt->scale[2]);
+      /* If the halfz setting ever changes, the viewports will also get
+       * updated. The rast will get updated before the validate function has a
+       * chance to hit, so we can just use it directly without an atom
+       * dependency.
+       */
+      util_viewport_zmin_zmax(vpt, nv50->rast->pipe.clip_halfz, &zmin, &zmax);
 
 #ifdef NV50_SCISSORS_CLIPPING
       BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
index 28d6fec..12d5b0e 100644 (file)
@@ -514,10 +514,8 @@ nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
    NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count);
 }
 
-static void
-nvc0_context_get_sample_position(struct pipe_context *pipe,
-                                 unsigned sample_count, unsigned sample_index,
-                                 float *xy)
+const void *
+nvc0_get_sample_locations(unsigned sample_count)
 {
    static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
    static const uint8_t ms2[2][2] = {
@@ -549,8 +547,22 @@ nvc0_context_get_sample_position(struct pipe_context *pipe,
    case 8: ptr = ms8; break;
    default:
       assert(0);
-      return; /* bad sample count -> undefined locations */
+      return NULL; /* bad sample count -> undefined locations */
    }
+   return ptr;
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *pipe,
+                                 unsigned sample_count, unsigned sample_index,
+                                 float *xy)
+{
+   const uint8_t (*ptr)[2];
+
+   ptr = nvc0_get_sample_locations(sample_count);
+   if (!ptr)
+      return;
+
    xy[0] = ptr[sample_index][0] * 0.0625f;
    xy[1] = ptr[sample_index][1] * 0.0625f;
 }
index 8d27300..ff5467c 100644 (file)
@@ -278,6 +278,7 @@ struct pipe_context *nvc0_create(struct pipe_screen *, void *, unsigned flags);
 void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
                        bool on_flush);
 void nvc0_default_kick_notify(struct nouveau_pushbuf *);
+const void *nvc0_get_sample_locations(unsigned);
 
 /* nvc0_draw.c */
 extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
index ad44e85..ce59484 100644 (file)
@@ -2,6 +2,7 @@
 #include "util/u_format.h"
 #include "util/u_framebuffer.h"
 #include "util/u_math.h"
+#include "util/u_viewport.h"
 
 #include "nvc0/nvc0_context.h"
 
@@ -211,6 +212,19 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
        PUSH_DATAf(push, xy[1]);
     }
 
+   if (screen->base.class_3d >= GM200_3D_CLASS) {
+      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+      uint32_t val[4] = {};
+
+      for (i = 0; i < 16; i++) {
+         val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
+         val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
+      }
+
+      BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+      PUSH_DATAp(push, val, 4);
+   }
+
     if (serialize)
        IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
 
@@ -316,8 +330,12 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
       PUSH_DATA (push, (w << 16) | x);
       PUSH_DATA (push, (h << 16) | y);
 
-      zmin = vp->translate[2] - fabsf(vp->scale[2]);
-      zmax = vp->translate[2] + fabsf(vp->scale[2]);
+      /* If the halfz setting ever changes, the viewports will also get
+       * updated. The rast will get updated before the validate function has a
+       * chance to hit, so we can just use it directly without an atom
+       * dependency.
+       */
+      util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
 
       BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
       PUSH_DATAf(push, zmin);
index 1a5d8ec..716048d 100644 (file)
@@ -589,13 +589,11 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
       PUSH_DATA (nvc0->base.pushbuf, 0);
    }
 
-   if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
-      /* Invalidate all CP textures because they are aliased. */
-      for (int i = 0; i < nvc0->num_textures[5]; i++)
-         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
-      nvc0->textures_dirty[5] = ~0;
-      nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
-   }
+   /* Invalidate all CP textures because they are aliased. */
+   for (int i = 0; i < nvc0->num_textures[5]; i++)
+      nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
+   nvc0->textures_dirty[5] = ~0;
+   nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
 }
 
 bool
@@ -709,11 +707,9 @@ void nvc0_validate_samplers(struct nvc0_context *nvc0)
       PUSH_DATA (nvc0->base.pushbuf, 0);
    }
 
-   if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
-      /* Invalidate all CP samplers because they are aliased. */
-      nvc0->samplers_dirty[5] = ~0;
-      nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
-   }
+   /* Invalidate all CP samplers because they are aliased. */
+   nvc0->samplers_dirty[5] = ~0;
+   nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
 }
 
 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
index cae621c..3d20c68 100644 (file)
@@ -835,7 +835,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
 
    /* Queue things up to let the macros write params to the driver constbuf */
    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-   PUSH_DATA (push, 512);
+   PUSH_DATA (push, 2048);
    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
    BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
@@ -981,7 +981,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    if (nvc0->vertprog->vp.need_draw_parameters) {
       PUSH_SPACE(push, 9);
       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, 512);
+      PUSH_DATA (push, 2048);
       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
       if (!info->indirect) {
index a1d1d3e..d172d73 100644 (file)
@@ -237,7 +237,13 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0)
       BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
       PUSH_DATA (nvc0->base.pushbuf, 0);
    }
+
+   /* Invalidate all 3D samplers because they are aliased. */
+   for (int s = 0; s < 5; s++)
+      nvc0->samplers_dirty[s] = ~0;
+   nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
 }
+
 /* (Code duplicated at bottom for various non-convincing reasons.
  *  E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
  *  entries to avoid a subchannel switch.
@@ -690,6 +696,14 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
    }
 
    nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+   /* Invalidate all 3D textures because they are aliased. */
+   for (int s = 0; s < 5; s++) {
+      for (int i = 0; i < nvc0->num_textures[s]; i++)
+         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
+      nvc0->textures_dirty[s] = ~0;
+   }
+   nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
 }
 
 
index 9412e89..6d6998e 100644 (file)
@@ -143,6 +143,13 @@ void cayman_init_msaa(struct pipe_context *ctx)
 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
 {
        switch (nr_samples) {
+       default:
+       case 1:
+               radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+               radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+               radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+               radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+               break;
        case 2:
                radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
                radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
index d8ec2a3..8e23b61 100644 (file)
@@ -519,6 +519,12 @@ static void cik_sdma_copy(struct pipe_context *ctx,
                return;
        }
 
+       /* Carrizo SDMA texture copying is very broken for some users.
+        * https://bugs.freedesktop.org/show_bug.cgi?id=97029
+        */
+       if (sctx->b.family == CHIP_CARRIZO)
+               goto fallback;
+
        if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
                                  src, src_level, src_box))
                return;
index bbd02e9..fe4cb29 100644 (file)
@@ -819,9 +819,9 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
                util_memcpy_cpu_to_le32(tmp, ptr, size);
 }
 
-void si_set_constant_buffer(struct si_context *sctx,
-                           struct si_buffer_resources *buffers,
-                           uint slot, struct pipe_constant_buffer *input)
+static void si_set_constant_buffer(struct si_context *sctx,
+                                  struct si_buffer_resources *buffers,
+                                  uint slot, struct pipe_constant_buffer *input)
 {
        assert(slot < buffers->desc.num_elements);
        pipe_resource_reference(&buffers->buffers[slot], NULL);
@@ -881,6 +881,12 @@ void si_set_constant_buffer(struct si_context *sctx,
        buffers->desc.dirty_mask |= 1u << slot;
 }
 
+void si_set_rw_buffer(struct si_context *sctx,
+                     uint slot, struct pipe_constant_buffer *input)
+{
+       si_set_constant_buffer(sctx, &sctx->rw_buffers, slot, input);
+}
+
 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
                                        uint shader, uint slot,
                                        struct pipe_constant_buffer *input)
@@ -1052,10 +1058,10 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
                 * and most other clients can use TC L2 as well, we don't need
                 * to flush it.
                 *
-                * The only case which requires flushing it is VGT DMA index
-                * fetching, which is a rare case. Thus, flag the TC L2
-                * dirtiness in the resource and handle it when index fetching
-                * is used.
+                * The only cases which requires flushing it is VGT DMA index
+                * fetching (on <= CIK) and indirect draw data, which are rare
+                * cases. Thus, flag the TC L2 dirtiness in the resource and
+                * handle it at draw call time.
                 */
                for (i = 0; i < sctx->b.streamout.num_targets; i++)
                        if (sctx->b.streamout.targets[i])
@@ -1177,8 +1183,7 @@ static void si_set_polygon_stipple(struct pipe_context *ctx,
        cb.user_buffer = stipple;
        cb.buffer_size = sizeof(stipple);
 
-       si_set_constant_buffer(sctx, &sctx->rw_buffers,
-                              SI_PS_CONST_POLY_STIPPLE, &cb);
+       si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
 }
 
 /* TEXTURE METADATA ENABLE/DISABLE */
index e8e0403..69478a8 100644 (file)
@@ -216,7 +216,8 @@ void si_begin_new_cs(struct si_context *ctx)
 
        si_mark_atom_dirty(ctx, &ctx->clip_regs);
        si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
-       si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
+       ctx->msaa_sample_locs.nr_samples = 0;
+       si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
        si_mark_atom_dirty(ctx, &ctx->msaa_config);
        si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
        si_mark_atom_dirty(ctx, &ctx->cb_render_state);
index 88f4f20..9dd9ef5 100644 (file)
@@ -212,8 +212,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        si_begin_new_cs(sctx);
        r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */
 
-       /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
-        * with a NULL buffer). We need to use a dummy buffer instead. */
+       /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
+        * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
        if (sctx->b.chip_class == CIK) {
                sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
                                                                 PIPE_USAGE_DEFAULT, 16);
@@ -228,6 +228,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                        }
                }
 
+               si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
+                                &sctx->null_const_buf);
+               si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES,
+                                &sctx->null_const_buf);
+               si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE,
+                                &sctx->null_const_buf);
+               si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS,
+                                &sctx->null_const_buf);
+
                /* Clear the NULL constant buffer, because loads should return zeros. */
                sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
                                     sctx->null_const_buf.buffer->width0, 0,
index dbbf9b6..2661972 100644 (file)
@@ -168,6 +168,11 @@ struct si_clip_state {
        struct pipe_clip_state          state;
 };
 
+struct si_sample_locs {
+       struct r600_atom        atom;
+       unsigned                nr_samples;
+};
+
 struct si_sample_mask {
        struct r600_atom        atom;
        uint16_t                sample_mask;
@@ -212,7 +217,7 @@ struct si_context {
        /* Atom declarations. */
        struct r600_atom                cache_flush;
        struct si_framebuffer           framebuffer;
-       struct r600_atom                msaa_sample_locs;
+       struct si_sample_locs           msaa_sample_locs;
        struct r600_atom                db_render_state;
        struct r600_atom                msaa_config;
        struct si_sample_mask           sample_mask;
index 5e5bf68..aad7dd0 100644 (file)
@@ -6567,6 +6567,41 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
        radeon_llvm_dispose(&ctx.radeon_bld);
 
+       /* Validate SGPR and VGPR usage for compute to detect compiler bugs.
+        * LLVM 3.9svn has this bug.
+        */
+       if (sel->type == PIPE_SHADER_COMPUTE) {
+               unsigned *props = sel->info.properties;
+               unsigned wave_size = 64;
+               unsigned max_vgprs = 256;
+               unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
+               unsigned max_sgprs_per_wave = 128;
+               unsigned min_waves_per_cu =
+                       DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+                                    props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+                                    props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
+                                    wave_size);
+               unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
+
+               max_vgprs = max_vgprs / min_waves_per_simd;
+               max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
+
+               if (shader->config.num_sgprs > max_sgprs ||
+                   shader->config.num_vgprs > max_vgprs) {
+                       fprintf(stderr, "LLVM failed to compile a shader correctly: "
+                               "SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n",
+                               shader->config.num_sgprs, shader->config.num_vgprs,
+                               max_sgprs, max_vgprs);
+
+                       /* Just terminate the process, because dependent
+                        * shaders can hang due to bad input data, but use
+                        * the env var to allow shader-db to work.
+                        */
+                       if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false))
+                               abort();
+               }
+       }
+
        /* Add the scratch offset to input SGPRs. */
        if (shader->config.scratch_bytes_per_wave)
                shader->info.num_input_sgprs += 1; /* scratch byte offset */
index 3bbb81a..2cef615 100644 (file)
@@ -460,6 +460,10 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
                        S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
                        S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
 
+               /* Only set dual source blending for MRT0 to avoid a hang. */
+               if (i >= 1 && blend->dual_src_blend)
+                       continue;
+
                if (!state->rt[j].colormask)
                        continue;
 
@@ -620,8 +624,7 @@ static void si_set_clip_state(struct pipe_context *ctx,
        cb.user_buffer = state->ucp;
        cb.buffer_offset = 0;
        cb.buffer_size = 4*4*8;
-       si_set_constant_buffer(sctx, &sctx->rw_buffers,
-                              SI_VS_CONST_CLIP_PLANES, &cb);
+       si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
        pipe_resource_reference(&cb.buffer, NULL);
 }
 
@@ -847,9 +850,13 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
                return;
 
        if (sctx->framebuffer.nr_samples > 1 &&
-           (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
+           (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) {
                si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
+               if (sctx->b.family >= CHIP_POLARIS10)
+                       si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+       }
+
        r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
 
        si_pm4_bind_state(sctx, rasterizer, rs);
@@ -2395,21 +2402,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                        assert(0);
                }
                constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
-               si_set_constant_buffer(sctx, &sctx->rw_buffers,
-                                      SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
+               si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
 
-               /* Smoothing (only possible with nr_samples == 1) uses the same
-                * sample locations as the MSAA it simulates.
-                *
-                * Therefore, don't update the sample locations when
-                * transitioning from no AA to smoothing-equivalent AA, and
-                * vice versa.
-                */
-               if ((sctx->framebuffer.nr_samples != 1 ||
-                    old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
-                   (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
-                    old_nr_samples != 1))
-                       si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
+               si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
        }
 }
 
@@ -2536,8 +2531,37 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned nr_samples = sctx->framebuffer.nr_samples;
 
-       cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
-                                               SI_NUM_SMOOTH_AA_SAMPLES);
+       /* Smoothing (only possible with nr_samples == 1) uses the same
+        * sample locations as the MSAA it simulates.
+        */
+       if (nr_samples <= 1 && sctx->smoothing_enabled)
+               nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
+
+       /* On Polaris, the small primitive filter uses the sample locations
+        * even when MSAA is off, so we need to make sure they're set to 0.
+        */
+       if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
+           (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
+               sctx->msaa_sample_locs.nr_samples = nr_samples;
+               cayman_emit_msaa_sample_locs(cs, nr_samples);
+       }
+
+       if (sctx->b.family >= CHIP_POLARIS10) {
+               struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+               unsigned small_prim_filter_cntl =
+                       S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+                       S_028830_LINE_FILTER_DISABLE(1); /* line bug */
+
+               /* The alternative of setting sample locations to 0 would
+                * require a DB flush to avoid Z errors, see
+                * https://bugs.freedesktop.org/show_bug.cgi?id=96908
+                */
+               if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
+                       small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
+
+               radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+                                      small_prim_filter_cntl);
+       }
 }
 
 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
@@ -3246,8 +3270,7 @@ static void si_set_tess_state(struct pipe_context *ctx,
                               (void*)array, sizeof(array),
                               &cb.buffer_offset);
 
-       si_set_constant_buffer(sctx, &sctx->rw_buffers,
-                              SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
+       si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
        pipe_resource_reference(&cb.buffer, NULL);
 }
 
@@ -3337,7 +3360,7 @@ void si_init_state_functions(struct si_context *sctx)
 
        si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
        si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
-       si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+       si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
        si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
        si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
        si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
@@ -3810,11 +3833,6 @@ static void si_init_config(struct si_context *sctx)
        if (sctx->b.family == CHIP_STONEY)
                si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
 
-       if (sctx->b.family >= CHIP_POLARIS10)
-               si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
-                              S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
-                              S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
-
        si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
        if (sctx->b.chip_class >= CIK)
                si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
index aea98ae..97f6dfa 100644 (file)
@@ -269,9 +269,8 @@ void si_update_compressed_colortex_masks(struct si_context *sctx);
 void si_emit_graphics_shader_userdata(struct si_context *sctx,
                                       struct r600_atom *atom);
 void si_emit_compute_shader_userdata(struct si_context *sctx);
-void si_set_constant_buffer(struct si_context *sctx,
-                           struct si_buffer_resources *buffers,
-                           uint slot, struct pipe_constant_buffer *input);
+void si_set_rw_buffer(struct si_context *sctx,
+                     uint slot, struct pipe_constant_buffer *input);
 
 /* si_state.c */
 struct si_shader_selector;
index 3c11d14..31ca3bd 100644 (file)
@@ -963,6 +963,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                r600_resource(ib.buffer)->TC_L2_dirty = false;
        }
 
+       if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
+               sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               r600_resource(info->indirect)->TC_L2_dirty = false;
+       }
+
        /* Check flush flags. */
        if (sctx->b.flags)
                si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
index a7af76d..90ed7f3 100644 (file)
@@ -2052,6 +2052,9 @@ bool si_update_shaders(struct si_context *sctx)
 
                        if (sctx->b.chip_class == SI)
                                si_mark_atom_dirty(sctx, &sctx->db_render_state);
+
+                       if (sctx->framebuffer.nr_samples <= 1)
+                               si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
                }
        }
 
index 8d49b9d..341ee12 100644 (file)
 /*     */
 #define R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL                           0x028830 /* Polaris */
 #define   S_028830_SMALL_PRIM_FILTER_ENABLE(x)                        (((x) & 0x1) << 0)
+#define   C_028830_SMALL_PRIM_FILTER_ENABLE                           0xFFFFFFFE
 #define   S_028830_TRIANGLE_FILTER_DISABLE(x)                         (((x) & 0x1) << 1)
 #define   S_028830_LINE_FILTER_DISABLE(x)                             (((x) & 0x1) << 2)
 #define   S_028830_POINT_FILTER_DISABLE(x)                            (((x) & 0x1) << 3)
index abfef0f..9e37e23 100644 (file)
@@ -173,10 +173,16 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
 
    assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
 
-   for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+   /* In case the number of samplers and sampler_views doesn't match,
+    * loop over the lower of the two counts.
+    */
+   key->num_textures = MIN2(svga->curr.num_sampler_views[shader],
+                            svga->curr.num_samplers[shader]);
+
+   for (i = 0; i < key->num_textures; i++) {
       struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
-      if (view) {
-         assert(svga->curr.sampler[shader][i]);
+      const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+      if (view && sampler) {
          assert(view->texture);
          assert(view->texture->target < (1 << 4)); /* texture_target:4 */
 
@@ -195,7 +201,7 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
             }
          }
 
-         if (!svga->curr.sampler[shader][i]->normalized_coords) {
+         if (!sampler->normalized_coords) {
             assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
             key->tex[i].width_height_idx = idx++;
             key->tex[i].unnormalized = TRUE;
@@ -208,7 +214,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
          key->tex[i].swizzle_a = view->swizzle_a;
       }
    }
-   key->num_textures = svga->curr.num_sampler_views[shader];
 }
 
 
index b67413a..3459af3 100644 (file)
@@ -22,7 +22,7 @@
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
-AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) -std=c++11
+AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX11_CXXFLAGS)
 
 noinst_LTLIBRARIES = libmesaswr.la
 
@@ -31,7 +31,7 @@ libmesaswr_la_SOURCES = $(LOADER_SOURCES)
 COMMON_CXXFLAGS = \
        $(GALLIUM_DRIVER_CFLAGS) \
        $(LLVM_CXXFLAGS) \
-       -std=c++11 \
+       $(SWR_CXX11_CXXFLAGS) \
        -I$(builddir)/rasterizer/scripts \
        -I$(builddir)/rasterizer/jitter \
        -I$(srcdir)/rasterizer \
@@ -148,7 +148,7 @@ distclean-local:
 lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
 
 libswrAVX_la_CXXFLAGS = \
-       -march=core-avx-i \
+       $(SWR_AVX_CXXFLAGS) \
        -DKNOB_ARCH=KNOB_ARCH_AVX \
        $(COMMON_CXXFLAGS)
 
@@ -171,7 +171,7 @@ libswrAVX_la_LDFLAGS = \
        $(COMMON_LDFLAGS)
 
 libswrAVX2_la_CXXFLAGS = \
-       -march=core-avx2 \
+       $(SWR_AVX2_CXXFLAGS) \
        -DKNOB_ARCH=KNOB_ARCH_AVX2 \
        $(COMMON_CXXFLAGS)
 
index 21e3bde..f6bacfd 100644 (file)
@@ -28,6 +28,7 @@
 #include <xf86drm.h>
 #include <xf86drmMode.h>
 
+#include "util/u_hash_table.h"
 #include "util/u_memory.h"
 #include "util/ralloc.h"
 
@@ -329,10 +330,19 @@ vc4_bo_open_handle(struct vc4_screen *screen,
                    uint32_t winsys_stride,
                    uint32_t handle, uint32_t size)
 {
-        struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
+        struct vc4_bo *bo;
 
         assert(size);
 
+        pipe_mutex_lock(screen->bo_handles_mutex);
+
+        bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+        if (bo) {
+                pipe_reference(NULL, &bo->reference);
+                goto done;
+        }
+
+        bo = CALLOC_STRUCT(vc4_bo);
         pipe_reference_init(&bo->reference, 1);
         bo->screen = screen;
         bo->handle = handle;
@@ -347,6 +357,10 @@ vc4_bo_open_handle(struct vc4_screen *screen,
         bo->map = malloc(bo->size);
 #endif
 
+        util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+        pipe_mutex_unlock(screen->bo_handles_mutex);
         return bo;
 }
 
@@ -399,7 +413,11 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
                         bo->handle);
                 return -1;
         }
+
+        pipe_mutex_lock(bo->screen->bo_handles_mutex);
         bo->private = false;
+        util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+        pipe_mutex_unlock(bo->screen->bo_handles_mutex);
 
         return fd;
 }
index b77506e..71a4426 100644 (file)
@@ -25,6 +25,7 @@
 #define VC4_BUFMGR_H
 
 #include <stdint.h>
+#include "util/u_hash_table.h"
 #include "util/u_inlines.h"
 #include "vc4_qir.h"
 
@@ -87,11 +88,27 @@ vc4_bo_reference(struct vc4_bo *bo)
 static inline void
 vc4_bo_unreference(struct vc4_bo **bo)
 {
+        struct vc4_screen *screen;
         if (!*bo)
                 return;
 
-        if (pipe_reference(&(*bo)->reference, NULL))
-                vc4_bo_last_unreference(*bo);
+        if ((*bo)->private) {
+                /* Avoid the mutex for private BOs */
+                if (pipe_reference(&(*bo)->reference, NULL))
+                        vc4_bo_last_unreference(*bo);
+        } else {
+                screen = (*bo)->screen;
+                pipe_mutex_lock(screen->bo_handles_mutex);
+
+                if (pipe_reference(&(*bo)->reference, NULL)) {
+                        util_hash_table_remove(screen->bo_handles,
+                                               (void *)(uintptr_t)(*bo)->handle);
+                        vc4_bo_last_unreference(*bo);
+                }
+
+                pipe_mutex_unlock(screen->bo_handles_mutex);
+        }
+
         *bo = NULL;
 }
 
index c271a95..77013d9 100644 (file)
@@ -143,6 +143,8 @@ struct vc4_compiled_shader {
         /** bitmask of which inputs are color inputs, for flat shade handling. */
         uint32_t color_inputs;
 
+        bool disable_early_z;
+
         uint8_t num_inputs;
 
         /* Byte offsets for the start of the vertex attributes 0-7, and the
index 5d64797..8b192da 100644 (file)
@@ -71,7 +71,9 @@ vc4_emit_state(struct pipe_context *pctx)
                 vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy);
         }
 
-        if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
+        if (vc4->dirty & (VC4_DIRTY_RASTERIZER |
+                          VC4_DIRTY_ZSA |
+                          VC4_DIRTY_COMPILED_FS)) {
                 uint8_t ez_enable_mask_out = ~0;
 
                 /* HW-2905: If the RCL ends up doing a full-res load when
@@ -83,7 +85,7 @@ vc4_emit_state(struct pipe_context *pctx)
                  * was seeing bad rendering on glxgears -samples 4 even in
                  * that case.
                  */
-                if (vc4->msaa)
+                if (vc4->msaa || vc4->prog.fs->disable_early_z)
                         ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z;
 
                 cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
index d31b673..5df798a 100644 (file)
@@ -110,11 +110,12 @@ qir_opt_vpm(struct vc4_compile *c)
                          * sources are independent of previous instructions
                          */
                         if (temps == 1) {
-                                list_del(&inst->link);
                                 inst->src[j] = mov->src[0];
-                                list_replace(&mov->link, &inst->link);
-                                c->defs[temp] = NULL;
-                                free(mov);
+
+                                list_del(&inst->link);
+                                list_addtail(&inst->link, &mov->link);
+                                qir_remove_instruction(c, mov);
+
                                 progress = true;
                                 break;
                         }
index 5d036eb..3572cf7 100644 (file)
@@ -2032,6 +2032,11 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
                         shader->input_slots[shader->num_inputs] = *slot;
                         shader->num_inputs++;
                 }
+
+                /* Note: the temporary clone in c->s has been freed. */
+                nir_shader *orig_shader = key->shader_state->base.ir.nir;
+                if (orig_shader->info.outputs_written & (1 << FRAG_RESULT_DEPTH))
+                        shader->disable_early_z = true;
         } else {
                 shader->num_inputs = c->num_inputs;
 
index 20f137a..aabe593 100644 (file)
@@ -534,8 +534,8 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
         struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl);
         struct pipe_resource *prsc = &rsc->base.b;
         struct vc4_resource_slice *slice = &rsc->slices[0];
-        uint32_t expected_stride = align(prsc->width0 / rsc->cpp,
-                                         vc4_utile_width(rsc->cpp));
+        uint32_t expected_stride =
+            align(prsc->width0, vc4_utile_width(rsc->cpp)) * rsc->cpp;
 
         if (!rsc)
                 return NULL;
index 733275a..0fb2bbc 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/u_debug.h"
 #include "util/u_memory.h"
 #include "util/u_format.h"
+#include "util/u_hash_table.h"
 #include "util/ralloc.h"
 
 #include "vc4_screen.h"
@@ -82,7 +83,11 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen)
 static void
 vc4_screen_destroy(struct pipe_screen *pscreen)
 {
+        struct vc4_screen *screen = vc4_screen(pscreen);
+
+        util_hash_table_destroy(screen->bo_handles);
         vc4_bufmgr_destroy(pscreen);
+        close(screen->fd);
         ralloc_free(pscreen);
 }
 
@@ -488,6 +493,18 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
         return retval == usage;
 }
 
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+    return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+    return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
 struct pipe_screen *
 vc4_screen_create(int fd)
 {
@@ -505,6 +522,8 @@ vc4_screen_create(int fd)
 
         screen->fd = fd;
         list_inithead(&screen->bo_cache.time_list);
+        pipe_mutex_init(screen->bo_handles_mutex);
+        screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
 
         vc4_fence_init(screen);
 
index 03f76b2..281d254 100644 (file)
@@ -73,6 +73,9 @@ struct vc4_screen {
                 uint32_t bo_count;
         } bo_cache;
 
+        struct util_hash_table *bo_handles;
+        pipe_mutex bo_handles_mutex;
+
         uint32_t bo_size;
         uint32_t bo_count;
 };
index ca5812b..396f563 100644 (file)
@@ -318,17 +318,7 @@ struct pipe_blend_state
 
 struct pipe_blend_color
 {
-   /**
-    * Making the color array explicitly 16-byte aligned provides a hint to
-    * compilers to make more efficient auto-vectorization optimizations.
-    * The actual performance gains from vectorizing the blend color array are
-    * fairly minimal, if any, but the alignment is necessary to work around
-    * buggy vectorization in some compilers which fail to generate the correct
-    * unaligned accessors resulting in a segfault.  Specifically several
-    * versions of the Intel compiler are known to be affected but it's likely
-    * others are as well.
-    */
-   PIPE_ALIGN_VAR(16) float color[4];
+   float color[4];
 };
 
 
index d70439a..c9d9ab1 100644 (file)
@@ -874,8 +874,8 @@ static void enc_ReleaseTasks(struct list_head *head)
 {
    struct encode_task *i, *next;
 
-   if (!head)
-          return;
+   if (!head || !head->next)
+      return;
 
    LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) {
       pipe_resource_reference(&i->bitstream, NULL);
index f42dd25..06ade45 100644 (file)
@@ -1,5 +1,11 @@
 include $(top_srcdir)/src/gallium/Automake.inc
 
+if HAVE_ANDROID
+if HAVE_SHARED_GLAPI
+SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+endif
+endif
+
 AM_CFLAGS = \
        -I$(top_srcdir)/src/mapi \
        -I$(top_srcdir)/src/mesa \
@@ -49,6 +55,7 @@ gallium_dri_la_LIBADD = \
        $(top_builddir)/src/gallium/drivers/noop/libnoop.la \
        $(top_builddir)/src/gallium/drivers/rbug/librbug.la \
        $(top_builddir)/src/gallium/drivers/trace/libtrace.la \
+       $(SHARED_GLAPI_LIB) \
        $(SELINUX_LIBS) \
        $(EXPAT_LIBS) \
        $(LIBDRM_LIBS) \
index 0ce010e..8b776b7 100644 (file)
@@ -345,7 +345,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
                                         !(surf->flags & RADEON_SURF_SCANOUT) &&
                                         !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
-                                        !compressed && AddrDccIn.numSamples <= 1;
+                                        !compressed && AddrDccIn.numSamples <= 1 &&
+                                        surf->last_level == 0;
 
    /* This disables incorrect calculations (hacks) in addrlib. */
    AddrSurfInfoIn.flags.noStencil = 1;
index 7fcb6d2..74c77c5 100644 (file)
 #include "util/u_memory.h"
 #include "pipe/p_compiler.h"
 #include "util/u_hash_table.h"
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
 #include <sys/stat.h>
 #include <unistd.h>
 
index 21ac0d7..07eca99 100644 (file)
@@ -211,7 +211,29 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
 }
 
 static struct kms_sw_displaytarget *
-kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
+kms_sw_displaytarget_find_and_ref(struct kms_sw_winsys *kms_sw,
+                                  unsigned int kms_handle)
+{
+   struct kms_sw_displaytarget *kms_sw_dt;
+
+   LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
+      if (kms_sw_dt->handle == kms_handle) {
+         kms_sw_dt->ref_count++;
+
+         DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n",
+                     kms_sw_dt->handle, kms_sw_dt->size);
+
+         return kms_sw_dt;
+      }
+   }
+
+   return NULL;
+}
+
+static struct kms_sw_displaytarget *
+kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
+                                    unsigned width, unsigned height,
+                                    unsigned stride)
 {
    uint32_t handle = -1;
    struct kms_sw_displaytarget * kms_sw_dt;
@@ -222,6 +244,10 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
    if (ret)
       return NULL;
 
+   kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, handle);
+   if (kms_sw_dt)
+      return kms_sw_dt;
+
    kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget);
    if (!kms_sw_dt)
       return NULL;
@@ -229,6 +255,9 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
    kms_sw_dt->ref_count = 1;
    kms_sw_dt->handle = handle;
    kms_sw_dt->size = lseek(fd, 0, SEEK_END);
+   kms_sw_dt->width = width;
+   kms_sw_dt->height = height;
+   kms_sw_dt->stride = stride;
 
    if (kms_sw_dt->size == (off_t)-1) {
       FREE(kms_sw_dt);
@@ -274,25 +303,18 @@ kms_sw_displaytarget_from_handle(struct sw_winsys *ws,
 
    switch(whandle->type) {
    case DRM_API_HANDLE_TYPE_FD:
-      kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle);
-      if (kms_sw_dt) {
-         kms_sw_dt->ref_count++;
-         kms_sw_dt->width = templ->width0;
-         kms_sw_dt->height = templ->height0;
-         kms_sw_dt->stride = whandle->stride;
+      kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle,
+                                                      templ->width0,
+                                                      templ->height0,
+                                                      whandle->stride);
+      if (kms_sw_dt)
          *stride = kms_sw_dt->stride;
-      }
       return (struct sw_displaytarget *)kms_sw_dt;
    case DRM_API_HANDLE_TYPE_KMS:
-      LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
-         if (kms_sw_dt->handle == whandle->handle) {
-            kms_sw_dt->ref_count++;
-
-            DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size);
-
-            *stride = kms_sw_dt->stride;
-            return (struct sw_displaytarget *)kms_sw_dt;
-         }
+      kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, whandle->handle);
+      if (kms_sw_dt) {
+         *stride = kms_sw_dt->stride;
+         return (struct sw_displaytarget *)kms_sw_dt;
       }
       /* fallthrough */
    default:
index 0f4657a..95b4c2c 100644 (file)
 #include <string.h>
 #include <stdint.h>
 
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
 #include <sys/stat.h>
 #include <unistd.h>
 #include <errno.h>
index b3860db..653cbeb 100644 (file)
@@ -39,11 +39,9 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID,
                uint_fast16_t minorCode, bool coreX11error)
 {
    struct glx_display *glx_dpy = __glXInitialize(dpy);
-   struct glx_context *gc = __glXGetCurrentContext();
    xError error;
 
    assert(glx_dpy);
-   assert(gc);
 
    LockDisplay(dpy);
 
@@ -59,7 +57,7 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID,
    error.sequenceNumber = dpy->request;
    error.resourceID = resourceID;
    error.minorCode = minorCode;
-   error.majorCode = gc ? gc->majorOpcode : 0;
+   error.majorCode = glx_dpy->majorOpcode;
 
    _XError(dpy, &error);
 
index a0c1e3d..24c073c 100644 (file)
@@ -328,7 +328,7 @@ GetDrawableAttribute(Display * dpy, GLXDrawable drawable,
        *   the calling thread's current context a GLXBadDrawable error is
        *   generated."
        */
-      if (pdraw == NULL || gc == NULL || gc->currentDpy != dpy ||
+      if (pdraw == NULL || gc == &dummyContext || gc->currentDpy != dpy ||
          (gc->currentDrawable != drawable &&
          gc->currentReadable != drawable)) {
          __glXSendError(dpy, GLXBadDrawable, drawable,
index 3856032..8191da0 100644 (file)
@@ -524,7 +524,7 @@ glXWaitGL(void)
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc && gc->vtable->wait_gl)
+   if (gc != &dummyContext && gc->vtable->wait_gl)
       gc->vtable->wait_gl(gc);
 }
 
@@ -537,7 +537,7 @@ glXWaitX(void)
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc && gc->vtable->wait_x)
+   if (gc != &dummyContext && gc->vtable->wait_x)
       gc->vtable->wait_x(gc);
 }
 
@@ -546,7 +546,7 @@ glXUseXFont(Font font, int first, int count, int listBase)
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc && gc->vtable->use_x_font)
+   if (gc != &dummyContext && gc->vtable->use_x_font)
       gc->vtable->use_x_font(gc, font, first, count, listBase);
 }
 
@@ -838,7 +838,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
       __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
 
       if (pdraw != NULL) {
-         Bool flush = gc && drawable == gc->currentDrawable;
+         Bool flush = gc != &dummyContext && drawable == gc->currentDrawable;
 
          (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0, flush);
          return;
@@ -855,7 +855,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
     ** The calling thread may or may not have a current context.  If it
     ** does, send the context tag so the server can do a flush.
     */
-   if ((gc != NULL) && (dpy == gc->currentDpy) &&
+   if ((gc != &dummyContext) && (dpy == gc->currentDpy) &&
        ((drawable == gc->currentDrawable)
         || (drawable == gc->currentReadable))) {
       tag = gc->currentContextTag;
@@ -1388,7 +1388,7 @@ _GLX_PUBLIC Display *
 glXGetCurrentDisplay(void)
 {
    struct glx_context *gc = __glXGetCurrentContext();
-   if (NULL == gc)
+   if (gc == &dummyContext)
       return NULL;
    return gc->currentDpy;
 }
@@ -1751,7 +1751,7 @@ __glXSwapIntervalSGI(int interval)
    CARD32 *interval_ptr;
    CARD8 opcode;
 
-   if (gc == NULL) {
+   if (gc == &dummyContext) {
       return GLX_BAD_CONTEXT;
    }
 
@@ -1805,7 +1805,7 @@ __glXSwapIntervalMESA(unsigned int interval)
 #ifdef GLX_DIRECT_RENDERING
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc != NULL && gc->isDirect) {
+   if (gc != &dummyContext && gc->isDirect) {
       struct glx_screen *psc;
 
       psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
@@ -1827,7 +1827,7 @@ __glXGetSwapIntervalMESA(void)
 #ifdef GLX_DIRECT_RENDERING
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc != NULL && gc->isDirect) {
+   if (gc != &dummyContext && gc->isDirect) {
       struct glx_screen *psc;
 
       psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
@@ -1857,7 +1857,7 @@ __glXGetVideoSyncSGI(unsigned int *count)
    __GLXDRIdrawable *pdraw;
 #endif
 
-   if (!gc)
+   if (gc == &dummyContext)
       return GLX_BAD_CONTEXT;
 
 #ifdef GLX_DIRECT_RENDERING
@@ -1899,7 +1899,7 @@ __glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count)
    if (divisor <= 0 || remainder < 0)
       return GLX_BAD_VALUE;
 
-   if (!gc)
+   if (gc == &dummyContext)
       return GLX_BAD_CONTEXT;
 
 #ifdef GLX_DIRECT_RENDERING
@@ -2212,7 +2212,7 @@ __glXSwapBuffersMscOML(Display * dpy, GLXDrawable drawable,
    struct glx_screen *psc = pdraw ? pdraw->psc : NULL;
 #endif
 
-   if (!gc) /* no GLX for this */
+   if (gc == &dummyContext) /* no GLX for this */
       return -1;
 
 #ifdef GLX_DIRECT_RENDERING
@@ -2392,7 +2392,7 @@ __glXCopySubBufferMESA(Display * dpy, GLXDrawable drawable,
     ** does, send the context tag so the server can do a flush.
     */
    gc = __glXGetCurrentContext();
-   if ((gc != NULL) && (dpy == gc->currentDpy) &&
+   if ((gc != &dummyContext) && (dpy == gc->currentDpy) &&
        ((drawable == gc->currentDrawable) ||
         (drawable == gc->currentReadable))) {
       tag = gc->currentContextTag;
@@ -2431,7 +2431,7 @@ __glXBindTexImageEXT(Display * dpy,
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL || gc->vtable->bind_tex_image == NULL)
+   if (gc == &dummyContext || gc->vtable->bind_tex_image == NULL)
       return;
 
    gc->vtable->bind_tex_image(dpy, drawable, buffer, attrib_list);
@@ -2442,7 +2442,7 @@ __glXReleaseTexImageEXT(Display * dpy, GLXDrawable drawable, int buffer)
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL || gc->vtable->release_tex_image == NULL)
+   if (gc == &dummyContext || gc->vtable->release_tex_image == NULL)
       return;
 
    gc->vtable->release_tex_image(dpy, drawable, buffer);
index b7252a7..098304d 100644 (file)
@@ -19,13 +19,13 @@ static void *__glXGLVNDGetProcAddress(const GLubyte *procName)
 
 static unsigned FindGLXFunction(const GLubyte *name)
 {
-    unsigned first = 0;
-    unsigned last = DI_FUNCTION_COUNT - 1;
+    int first = 0;
+    int last = DI_FUNCTION_COUNT - 1;
 
     while (first <= last) {
-        unsigned middle = (first + last) / 2;
-        int comp = strcmp((const char *) name,
-                          __glXDispatchTableStrings[middle]);
+        int middle = (first + last) / 2;
+        int comp = strcmp(__glXDispatchTableStrings[middle],
+                          (const char *) name);
 
         if (comp < 0)
             first = middle + 1;
index 9108ec2..4debf06 100644 (file)
@@ -106,7 +106,7 @@ glXQueryCurrentRendererIntegerMESA(int attribute, unsigned int *value)
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL)
+   if (gc == &dummyContext)
       return False;
 
    return __glXQueryRendererInteger(gc->psc, attribute, value);
@@ -166,7 +166,7 @@ glXQueryCurrentRendererStringMESA(int attribute)
 {
    struct glx_context *gc = __glXGetCurrentContext();
 
-   if (gc == NULL)
+   if (gc == &dummyContext)
       return False;
 
    return __glXQueryRendererString(gc->psc, attribute);
index db20749..801f54a 100644 (file)
@@ -75,7 +75,20 @@ indirect_create_context_attribs(struct glx_screen *base,
    return indirect_create_context(base, config_base, shareList, 0);
 }
 
-__thread void *__glX_tls_Context = NULL;
+/* This is necessary so that we don't have to link with glxcurrent.c
+ * which would require us to link with X libraries and what not.
+ */
+GLubyte dummyBuffer[__GLX_BUFFER_LIMIT_SIZE];
+struct glx_context_vtable dummyVtable;
+struct glx_context dummyContext = {
+   &dummyBuffer[0],
+   &dummyBuffer[0],
+   &dummyBuffer[0],
+   &dummyBuffer[__GLX_BUFFER_LIMIT_SIZE],
+   sizeof(dummyBuffer),
+   &dummyVtable
+};
+__thread void *__glX_tls_Context = &dummyContext;
 
 #if !defined(GLX_USE_TLS)
 extern "C" struct glx_context *
index 44e2804..8bc28a9 100644 (file)
@@ -79,7 +79,7 @@
     </group>
   </struct>
 
-  <struct name="BLEND_STATE" length="2">
+  <struct name="BLEND_STATE_ENTRY" length="2">
     <field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
     <field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
     <field name="Alpha Blend Function" start="26" end="28" type="uint">
     <field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
   </struct>
 
+  <struct name="BLEND_STATE" length="16">
+    <group count="8" start="0" size="64">
+      <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+    </group>
+  </struct>
+
   <struct name="CC_VIEWPORT" length="2">
     <field name="Minimum Depth" start="0" end="31" type="float"/>
     <field name="Maximum Depth" start="32" end="63" type="float"/>
     <field name="CLIP Enable" start="95" end="95" type="bool"/>
     <field name="API Mode" start="94" end="94" type="uint">
       <value name="APIMODE_OGL" value="0"/>
+      <value name="APIMODE_D3D" value="1"/>
     </field>
     <field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
     <field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
index 2bbfcb7..cbeb2e1 100644 (file)
     </group>
   </struct>
 
-  <struct name="BLEND_STATE" length="2">
+  <struct name="BLEND_STATE_ENTRY" length="2">
     <field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
     <field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
     <field name="Alpha Blend Function" start="26" end="28" type="uint">
     <field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
   </struct>
 
+  <struct name="BLEND_STATE" length="16">
+    <group count="8" start="0" size="64">
+      <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+    </group>
+  </struct>
+
   <struct name="CC_VIEWPORT" length="2">
     <field name="Minimum Depth" start="0" end="31" type="float"/>
     <field name="Maximum Depth" start="32" end="63" type="float"/>
     <field name="Clip Enable" start="95" end="95" type="bool"/>
     <field name="API Mode" start="94" end="94" type="uint">
       <value name="APIMODE_OGL" value="0"/>
+      <value name="APIMODE_D3D" value="1"/>
     </field>
     <field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
     <field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
index 9ab432c..40f3c31 100644 (file)
     </group>
   </struct>
 
-  <struct name="BLEND_STATE" length="2">
+  <struct name="BLEND_STATE_ENTRY" length="2">
     <field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
     <field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
     <field name="Alpha Blend Function" start="26" end="28" type="uint">
     <field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
   </struct>
 
+  <struct name="BLEND_STATE" length="16">
+    <group count="8" start="0" size="64">
+      <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+    </group>
+  </struct>
+
   <struct name="CC_VIEWPORT" length="2">
     <field name="Minimum Depth" start="0" end="31" type="float"/>
     <field name="Maximum Depth" start="32" end="63" type="float"/>
     <field name="Clip Enable" start="95" end="95" type="bool"/>
     <field name="API Mode" start="94" end="94" type="uint">
       <value name="APIMODE_OGL" value="0"/>
+      <value name="APIMODE_D3D" value="1"/>
     </field>
     <field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
     <field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
index 80d40fb..e3b0cdb 100644 (file)
     <field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/>
     <field name="Clip Enable" start="95" end="95" type="bool"/>
     <field name="API Mode" start="94" end="94" type="uint">
-      <value name="OGL" value="0"/>
+      <value name="APIMODE_OGL" value="0"/>
+      <value name="APIMODE_D3D" value="1"/>
     </field>
     <field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/>
     <field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/>
     <field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/>
     <field name="Clip Mode" start="77" end="79" type="uint">
-      <value name="NORMAL" value="0"/>
-      <value name="REJECT_ALL" value="3"/>
-      <value name="ACCEPT_ALL" value="4"/>
+      <value name="CLIPMODE_NORMAL" value="0"/>
+      <value name="CLIPMODE_REJECT_ALL" value="3"/>
+      <value name="CLIPMODE_ACCEPT_ALL" value="4"/>
     </field>
     <field name="Perspective Divide Disable" start="73" end="73" type="bool"/>
     <field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="SO Function Enable" start="63" end="63" type="uint"/>
-    <field name="API Rendering Disable" start="62" end="62" type="uint"/>
+    <field name="Rendering Disable" start="62" end="62" type="uint"/>
     <field name="Render Stream Select" start="59" end="60" type="uint"/>
     <field name="Reorder Mode" start="58" end="58" type="uint">
       <value name="LEADING" value="0"/>
index 94b7d28..4333b89 100644 (file)
     <field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/>
     <field name="Clip Enable" start="95" end="95" type="bool"/>
     <field name="API Mode" start="94" end="94" type="uint">
-      <value name="OGL" value="0"/>
+      <value name="APIMODE_OGL" value="0"/>
+      <value name="APIMODE_D3D" value="1"/>
     </field>
     <field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/>
     <field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/>
     <field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/>
     <field name="Clip Mode" start="77" end="79" type="uint">
-      <value name="NORMAL" value="0"/>
-      <value name="REJECT_ALL" value="3"/>
-      <value name="ACCEPT_ALL" value="4"/>
+      <value name="CLIPMODE_NORMAL" value="0"/>
+      <value name="CLIPMODE_REJECT_ALL" value="3"/>
+      <value name="CLIPMODE_ACCEPT_ALL" value="4"/>
     </field>
     <field name="Perspective Divide Disable" start="73" end="73" type="bool"/>
     <field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="SO Function Enable" start="63" end="63" type="uint"/>
-    <field name="API Rendering Disable" start="62" end="62" type="uint"/>
+    <field name="Rendering Disable" start="62" end="62" type="uint"/>
     <field name="Render Stream Select" start="59" end="60" type="uint"/>
     <field name="Reorder Mode" start="58" end="58" type="uint">
       <value name="LEADING" value="0"/>
index 1fd6683..8e03ee6 100644 (file)
@@ -46,23 +46,25 @@ AM_CPPFLAGS = \
        -I$(top_srcdir)/src/gallium/auxiliary \
        -I$(top_srcdir)/src/gallium/include
 
-libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
+AM_CFLAGS = \
+       $(VISIBILITY_CFLAGS) \
+       -Wno-override-init
 
 libisl_la_LIBADD = $(ISL_GEN_LIBS)
 
 libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES)
 
 libisl_gen7_la_SOURCES = $(ISL_GEN7_FILES)
-libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
+libisl_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
 
 libisl_gen75_la_SOURCES = $(ISL_GEN75_FILES)
-libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75
+libisl_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75
 
 libisl_gen8_la_SOURCES = $(ISL_GEN8_FILES)
-libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80
+libisl_gen8_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=80
 
 libisl_gen9_la_SOURCES = $(ISL_GEN9_FILES)
-libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90
+libisl_gen9_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=90
 
 BUILT_SOURCES = $(ISL_GENERATED_FILES)
 
index 77b570d..0bdfa9d 100644 (file)
@@ -490,27 +490,27 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
 
       case ISL_MSAA_LAYOUT_ARRAY:
          assert(info->depth == 1);
-         assert(info->array_len == 1);
+         assert(info->levels == 1);
          assert(!isl_format_is_compressed(info->format));
 
          *phys_level0_sa = (struct isl_extent4d) {
             .w = info->width,
             .h = info->height,
             .d = 1,
-            .a = info->samples,
+            .a = info->array_len * info->samples,
          };
          break;
 
       case ISL_MSAA_LAYOUT_INTERLEAVED:
          assert(info->depth == 1);
-         assert(info->array_len == 1);
+         assert(info->levels == 1);
          assert(!isl_format_is_compressed(info->format));
 
          *phys_level0_sa = (struct isl_extent4d) {
             .w = info->width,
             .h = info->height,
             .d = 1,
-            .a = 1,
+            .a = info->array_len,
          };
 
          isl_msaa_interleaved_scale_px_to_sa(info->samples,
index ef86228..64aced8 100644 (file)
@@ -989,7 +989,7 @@ isl_has_matching_typed_storage_image_format(const struct brw_device_info *devinf
 static inline bool
 isl_tiling_is_any_y(enum isl_tiling tiling)
 {
-   return (1u << tiling) & ISL_TILING_ANY_MASK;
+   return (1u << tiling) & ISL_TILING_ANY_Y_MASK;
 }
 
 static inline bool
index b2317d8..1c985b6 100644 (file)
@@ -430,8 +430,15 @@ isl_genX(buffer_fill_state_s)(void *state,
    uint32_t num_elements = info->size / info->stride;
 
    if (GEN_GEN >= 7) {
+      /* From the IVB PRM, SURFACE_STATE::Height,
+       *
+       *    For typed buffer and structured buffer surfaces, the number
+       *    of entries in the buffer ranges from 1 to 2^27. For raw buffer
+       *    surfaces, the number of entries in the buffer is the number of bytes
+       *    which can range from 1 to 2^30.
+       */
       if (info->format == ISL_FORMAT_RAW) {
-         assert(num_elements <= (1ull << 31));
+         assert(num_elements <= (1ull << 30));
          assert((num_elements & 3) == 0);
       } else {
          assert(num_elements <= (1ull << 27));
index 0e521cf..6b1015a 100644 (file)
@@ -61,7 +61,9 @@ AM_CPPFLAGS = \
        -I$(top_builddir)/src/intel \
        -I$(top_srcdir)/src/intel
 
-AM_CFLAGS = -Wno-override-init -msse2
+AM_CFLAGS = \
+       $(VISIBILITY_CFLAGS) \
+       -Wno-override-init -msse2
 
 libanv_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
 libanv_gen7_la_SOURCES = $(GEN7_FILES)
@@ -159,6 +161,7 @@ libvulkan_intel_la_LDFLAGS = \
        -module \
        -no-undefined \
        -avoid-version \
+       $(BSYMBOLIC) \
        $(GC_SECTIONS) \
        $(LD_NO_UNDEFINED)
 
index 20d3af1..ca78c09 100644 (file)
@@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
    switch (cmd_buffer->device->info.gen) {
    case 7:
       if (cmd_buffer->device->info.is_haswell)
-         return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+         return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
       else
          return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
    case 8:
@@ -741,20 +741,26 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 {
    struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
    struct anv_subpass *subpass = cmd_buffer->state.subpass;
-   struct anv_pipeline_bind_map *map;
+   struct anv_pipeline *pipeline;
    uint32_t bias, state_offset;
 
    switch (stage) {
    case  MESA_SHADER_COMPUTE:
-      map = &cmd_buffer->state.compute_pipeline->bindings[stage];
+      pipeline = cmd_buffer->state.compute_pipeline;
       bias = 1;
       break;
    default:
-      map = &cmd_buffer->state.pipeline->bindings[stage];
+      pipeline = cmd_buffer->state.pipeline;
       bias = 0;
       break;
    }
 
+   if (!anv_pipeline_has_stage(pipeline, stage)) {
+      *bt_state = (struct anv_state) { 0, };
+      return VK_SUCCESS;
+   }
+
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (bias + map->surface_count == 0) {
       *bt_state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -907,13 +913,19 @@ VkResult
 anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
                              gl_shader_stage stage, struct anv_state *state)
 {
-   struct anv_pipeline_bind_map *map;
+   struct anv_pipeline *pipeline;
 
    if (stage == MESA_SHADER_COMPUTE)
-      map = &cmd_buffer->state.compute_pipeline->bindings[stage];
+      pipeline = cmd_buffer->state.compute_pipeline;
    else
-      map = &cmd_buffer->state.pipeline->bindings[stage];
+      pipeline = cmd_buffer->state.pipeline;
 
+   if (!anv_pipeline_has_stage(pipeline, stage)) {
+      *state = (struct anv_state) { 0, };
+      return VK_SUCCESS;
+   }
+
+   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
    if (map->sampler_count == 0) {
       *state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -1080,10 +1092,14 @@ struct anv_state
 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
                               gl_shader_stage stage)
 {
+   /* If we don't have this stage, bail. */
+   if (!anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage))
+      return (struct anv_state) { .offset = 0 };
+
    struct anv_push_constants *data =
       cmd_buffer->state.push_constants[stage];
    const struct brw_stage_prog_data *prog_data =
-      cmd_buffer->state.pipeline->prog_data[stage];
+      anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]);
 
    /* If we don't actually have any push constants, bail. */
    if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
index 448ae0e..4ab1802 100644 (file)
@@ -27,6 +27,8 @@
 #include <unistd.h>
 #include <fcntl.h>
 
+#include "util/mesa-sha1.h"
+
 #include "anv_private.h"
 
 /*
@@ -65,9 +67,8 @@ VkResult anv_CreateDescriptorSetLayout(
    struct anv_sampler **samplers =
       (struct anv_sampler **)&set_layout->binding[max_binding + 1];
 
+   memset(set_layout, 0, sizeof(*set_layout));
    set_layout->binding_count = max_binding + 1;
-   set_layout->shader_stages = 0;
-   set_layout->size = 0;
 
    for (uint32_t b = 0; b <= max_binding; b++) {
       /* Initialize all binding_layout entries to -1 */
@@ -202,6 +203,15 @@ void anv_DestroyDescriptorSetLayout(
    anv_free2(&device->alloc, pAllocator, set_layout);
 }
 
+static void
+sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
+                                  const struct anv_descriptor_set_layout *layout)
+{
+   size_t size = sizeof(*layout) +
+                 sizeof(layout->binding[0]) * layout->binding_count;
+   _mesa_sha1_update(ctx, layout, size);
+}
+
 /*
  * Pipeline layouts.  These have nothing to do with the pipeline.  They are
  * just muttiple descriptor set layouts pasted together
@@ -246,6 +256,19 @@ VkResult anv_CreatePipelineLayout(
       }
    }
 
+   struct mesa_sha1 *ctx = _mesa_sha1_init();
+   for (unsigned s = 0; s < layout->num_sets; s++) {
+      sha1_update_descriptor_set_layout(ctx, layout->set[s].layout);
+      _mesa_sha1_update(ctx, &layout->set[s].dynamic_offset_start,
+                        sizeof(layout->set[s].dynamic_offset_start));
+   }
+   _mesa_sha1_update(ctx, &layout->num_sets, sizeof(layout->num_sets));
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      _mesa_sha1_update(ctx, &layout->stage[s].has_dynamic_offsets,
+                        sizeof(layout->stage[s].has_dynamic_offsets));
+   }
+   _mesa_sha1_final(ctx, layout->sha1);
+
    *pPipelineLayout = anv_pipeline_layout_to_handle(layout);
 
    return VK_SUCCESS;
@@ -409,6 +432,11 @@ anv_descriptor_set_create(struct anv_device *device,
       (struct anv_buffer_view *) &set->descriptors[layout->size];
    set->buffer_count = layout->buffer_count;
 
+   /* By defining the descriptors to be zero now, we can later verify that
+    * a descriptor has not been populated with user data.
+    */
+   memset(set->descriptors, 0, sizeof(struct anv_descriptor) * layout->size);
+
    /* Go through and fill out immutable samplers if we have any */
    struct anv_descriptor *desc = set->descriptors;
    for (uint32_t b = 0; b < layout->binding_count; b++) {
index dd941b6..cd8fb3a 100644 (file)
@@ -372,7 +372,7 @@ void anv_GetPhysicalDeviceFeatures(
       .robustBufferAccess                       = true,
       .fullDrawIndexUint32                      = true,
       .imageCubeArray                           = false,
-      .independentBlend                         = pdevice->info->gen >= 8,
+      .independentBlend                         = true,
       .geometryShader                           = true,
       .tessellationShader                       = false,
       .sampleRateShading                        = false,
@@ -438,6 +438,10 @@ void anv_GetPhysicalDeviceProperties(
 
    const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
 
+   /* See assertions made when programming the buffer surface state. */
+   const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
+                                      (1ul << 30) : (1ul << 27);
+
    VkSampleCountFlags sample_counts =
       isl_device_get_sample_counts(&pdevice->isl_dev);
 
@@ -448,8 +452,8 @@ void anv_GetPhysicalDeviceProperties(
       .maxImageDimensionCube                    = (1 << 14),
       .maxImageArrayLayers                      = (1 << 11),
       .maxTexelBufferElements                   = 128 * 1024 * 1024,
-      .maxUniformBufferRange                    = UINT32_MAX,
-      .maxStorageBufferRange                    = UINT32_MAX,
+      .maxUniformBufferRange                    = (1ul << 27),
+      .maxStorageBufferRange                    = max_raw_buffer_sz,
       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
       .maxMemoryAllocationCount                 = UINT32_MAX,
       .maxSamplerAllocationCount                = 64 * 1024,
@@ -649,13 +653,15 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr(
    return anv_lookup_entrypoint(pName);
 }
 
-/* The loader wants us to expose a second GetInstanceProcAddr function
- * to work around certain LD_PRELOAD issues seen in apps.
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
  */
+PUBLIC
 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
     VkInstance                                  instance,
     const char*                                 pName);
 
+PUBLIC
 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
     VkInstance                                  instance,
     const char*                                 pName)
@@ -869,7 +875,8 @@ VkResult anv_CreateDevice(
                        &device->dynamic_state_block_pool);
 
    anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
-   anv_pipeline_cache_init(&device->default_pipeline_cache, device);
+   anv_state_pool_init(&device->instruction_state_pool,
+                       &device->instruction_block_pool);
 
    anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
 
@@ -944,6 +951,7 @@ void anv_DestroyDevice(
    anv_bo_pool_finish(&device->batch_bo_pool);
    anv_state_pool_finish(&device->dynamic_state_pool);
    anv_block_pool_finish(&device->dynamic_state_block_pool);
+   anv_state_pool_finish(&device->instruction_state_pool);
    anv_block_pool_finish(&device->instruction_block_pool);
    anv_state_pool_finish(&device->surface_state_pool);
    anv_block_pool_finish(&device->surface_state_block_pool);
@@ -1786,23 +1794,3 @@ void anv_DestroyFramebuffer(
 
    anv_free2(&device->alloc, pAllocator, fb);
 }
-
-void vkCmdDbgMarkerBegin(
-    VkCommandBuffer                              commandBuffer,
-    const char*                                 pMarker)
-   __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerEnd(
-   VkCommandBuffer                              commandBuffer)
-   __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerBegin(
-    VkCommandBuffer                              commandBuffer,
-    const char*                                 pMarker)
-{
-}
-
-void vkCmdDbgMarkerEnd(
-    VkCommandBuffer                              commandBuffer)
-{
-}
index 2896174..dcf25ee 100644 (file)
@@ -134,7 +134,6 @@ if opt_header:
         print "%s gen75_%s%s;" % (type, name, args)
         print "%s gen8_%s%s;" % (type, name, args)
         print "%s gen9_%s%s;" % (type, name, args)
-        print "%s anv_validate_%s%s;" % (type, name, args)
         print_guard_end(name)
     exit()
 
@@ -185,23 +184,24 @@ for type, name, args, num, h in entrypoints:
     print "   \"vk%s\\0\"" % name
     offsets.append(i)
     i += 2 + len(name) + 1
-print """   ;
+print "   ;"
 
-/* Weak aliases for all potential validate functions. These will resolve to
- * NULL if they're not defined, which lets the resolve_entrypoint() function
- * either pick a validate wrapper if available or just plug in the actual
- * entry point.
- */
-"""
-
-# Now generate the table of all entry points and their validation functions
+# Now generate the table of all entry points
 
 print "\nstatic const struct anv_entrypoint entrypoints[] = {"
 for type, name, args, num, h in entrypoints:
     print "   { %5d, 0x%08x }," % (offsets[num], h)
 print "};\n"
 
-for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
+print """
+
+/* Weak aliases for all potential implementations. These will resolve to
+ * NULL if they're not defined, which lets the resolve_entrypoint() function
+ * either pick the correct entry point.
+ */
+"""
+
+for layer in [ "anv", "gen7", "gen75", "gen8", "gen9" ]:
     for type, name, args, num, h in entrypoints:
         print_guard_start(name)
         print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
@@ -214,27 +214,6 @@ for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
     print "};\n"
 
 print """
-#ifdef DEBUG
-static bool enable_validate = true;
-#else
-static bool enable_validate = false;
-#endif
-
-/* We can't use symbols that need resolving (like, oh, getenv) in the resolve
- * function. This means that we have to determine whether or not to use the
- * validation layer sometime before that. The constructor function attribute asks
- * the dynamic linker to invoke determine_validate() at dlopen() time which
- * works.
- */
-static void __attribute__ ((constructor))
-determine_validate(void)
-{
-   const char *s = getenv("ANV_VALIDATE");
-
-   if (s)
-      enable_validate = atoi(s);
-}
-
 static const struct brw_device_info *dispatch_devinfo;
 
 void
@@ -246,9 +225,6 @@ anv_set_dispatch_devinfo(const struct brw_device_info *devinfo)
 void * __attribute__ ((noinline))
 anv_resolve_entrypoint(uint32_t index)
 {
-   if (enable_validate && validate_layer.entrypoints[index])
-      return validate_layer.entrypoints[index];
-
    if (dispatch_devinfo == NULL) {
       return anv_layer.entrypoints[index];
    }
@@ -277,17 +253,6 @@ anv_resolve_entrypoint(uint32_t index)
 }
 """
 
-# Now output ifuncs and their resolve helpers for all entry points. The
-# resolve helper calls resolve_entrypoint() with the entry point index, which
-# lets the resolver look it up in the table.
-
-for type, name, args, num, h in entrypoints:
-    print_guard_start(name)
-    print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num)
-    print "%s vk%s%s\n   __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name)
-    print_guard_end(name)
-
-
 # Now generate the hash table used for entry point look up.  This is a
 # uint16_t table of entry point indices. We use 0xffff to indicate an entry
 # in the hash table is empty.
index 77d9931..caf4a3e 100644 (file)
@@ -322,81 +322,6 @@ void anv_GetImageSubresourceLayout(
    }
 }
 
-VkResult
-anv_validate_CreateImageView(VkDevice _device,
-                             const VkImageViewCreateInfo *pCreateInfo,
-                             const VkAllocationCallbacks *pAllocator,
-                             VkImageView *pView)
-{
-   ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
-   const VkImageSubresourceRange *subresource;
-
-   /* Validate structure type before dereferencing it. */
-   assert(pCreateInfo);
-   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO);
-   subresource = &pCreateInfo->subresourceRange;
-
-   /* Validate viewType is in range before using it. */
-   assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE);
-   assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE);
-
-   /* Validate format is in range before using it. */
-   assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE);
-   assert(pCreateInfo->format <= VK_FORMAT_END_RANGE);
-
-   /* Validate channel swizzles. */
-   assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
-   assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE);
-   assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
-   assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE);
-   assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
-   assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE);
-   assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
-   assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE);
-
-   /* Validate subresource. */
-   assert(subresource->aspectMask != 0);
-   assert(subresource->levelCount > 0);
-   assert(subresource->layerCount > 0);
-   assert(subresource->baseMipLevel < image->levels);
-   assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels);
-   assert(subresource->baseArrayLayer < image->array_size);
-   assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size);
-   assert(pView);
-
-   MAYBE_UNUSED const VkImageAspectFlags view_format_aspects =
-      vk_format_aspects(pCreateInfo->format);
-
-   const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT
-                                     | VK_IMAGE_ASPECT_STENCIL_BIT;
-
-   /* Validate format. */
-   if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
-      assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
-      assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-      assert(view_format_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
-   } else if (subresource->aspectMask & ds_flags) {
-      assert((subresource->aspectMask & ~ds_flags) == 0);
-
-      assert(pCreateInfo->format == image->vk_format);
-
-      if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
-         assert(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
-         assert(view_format_aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
-      }
-
-      if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
-         /* FINISHME: Is it legal to have an R8 view of S8? */
-         assert(image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
-         assert(view_format_aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
-      }
-   } else {
-      assert(!"bad VkImageSubresourceRange::aspectFlags");
-   }
-
-   return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView);
-}
-
 static struct anv_state
 alloc_surface_state(struct anv_device *device,
                     struct anv_cmd_buffer *cmd_buffer)
@@ -628,18 +553,19 @@ void anv_buffer_view_init(struct anv_buffer_view *view,
    view->format = anv_get_isl_format(&device->info, pCreateInfo->format,
                                      VK_IMAGE_ASPECT_COLOR_BIT,
                                      VK_IMAGE_TILING_LINEAR);
+   const uint32_t format_bs = isl_format_get_layout(view->format)->bs;
    view->bo = buffer->bo;
    view->offset = buffer->offset + pCreateInfo->offset;
    view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
-                 buffer->size - view->offset : pCreateInfo->range;
+                 buffer->size - pCreateInfo->offset : pCreateInfo->range;
+   view->range = align_down_npot_u32(view->range, format_bs);
 
    if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) {
       view->surface_state = alloc_surface_state(device, cmd_buffer);
 
       anv_fill_buffer_surface_state(device, view->surface_state,
                                     view->format,
-                                    view->offset, view->range,
-                                    isl_format_get_layout(view->format)->bs);
+                                    view->offset, view->range, format_bs);
    } else {
       view->surface_state = (struct anv_state){ 0 };
    }
index dc098ef..af15c2c 100644 (file)
@@ -106,7 +106,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
    nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                  vec4, "f_color");
    color_out->data.location = FRAG_RESULT_DATA0;
-   nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+   nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
 
    return b.shader;
 }
index 06e1043..649c11f 100644 (file)
@@ -92,6 +92,21 @@ vk_format_for_size(int bs)
    }
 }
 
+/* This function returns the format corresponding to a single component of the
+ * RGB format for the given size returned by vk_format_for_size().
+ */
+static VkFormat
+vk_single_component_format_for_rgb_size(int bs)
+{
+   switch (bs) {
+   case 3: return VK_FORMAT_R8_UNORM;
+   case 6: return VK_FORMAT_R16_UNORM;
+   case 12: return VK_FORMAT_R32_UINT;
+   default:
+      unreachable("Invalid format block size");
+   }
+}
+
 static void
 create_iview(struct anv_cmd_buffer *cmd_buffer,
              struct anv_meta_blit2d_surf *surf,
@@ -99,13 +114,14 @@ create_iview(struct anv_cmd_buffer *cmd_buffer,
              VkImageUsageFlags usage,
              uint32_t width,
              uint32_t height,
+             VkFormat format,
              VkImage *img,
              struct anv_image_view *iview)
 {
    const VkImageCreateInfo image_info = {
       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
       .imageType = VK_IMAGE_TYPE_2D,
-      .format = vk_format_for_size(surf->bs),
+      .format = format,
       .extent = {
          .width = width,
          .height = height,
@@ -179,6 +195,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer,
 
       create_iview(cmd_buffer, src, offset, VK_IMAGE_USAGE_SAMPLED_BIT,
                    rect->src_x + rect->width, rect->src_y + rect->height,
+                   vk_format_for_size(src->bs),
                    &tmp->image, &tmp->iview);
 
       anv_CreateDescriptorPool(vk_device,
@@ -328,10 +345,11 @@ blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer,
                 uint64_t offset,
                 uint32_t width,
                 uint32_t height,
+                VkFormat format,
                 struct blit2d_dst_temps *tmp)
 {
    create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
-                width, height, &tmp->image, &tmp->iview);
+                width, height, format, &tmp->image, &tmp->iview);
 
    anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device),
       &(VkFramebufferCreateInfo) {
@@ -406,7 +424,8 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer,
 
       struct blit2d_dst_temps dst_temps;
       blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width,
-                      rects[r].dst_y + rects[r].height, &dst_temps);
+                      rects[r].dst_y + rects[r].height,
+                      vk_format_for_size(dst->bs), &dst_temps);
 
       struct blit_vb_data {
          float pos[2];
@@ -544,7 +563,8 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer,
       };
 
       struct blit2d_dst_temps dst_temps;
-      blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, &dst_temps);
+      blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y,
+                      VK_FORMAT_R8_UINT, &dst_temps);
 
       struct blit_vb_header {
          struct anv_vue_header vue;
@@ -647,6 +667,141 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
+static void
+anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer,
+                        struct anv_meta_blit2d_surf *src,
+                        enum blit2d_src_type src_type,
+                        struct anv_meta_blit2d_surf *dst,
+                        unsigned num_rects,
+                        struct anv_meta_blit2d_rect *rects)
+{
+   struct anv_device *device = cmd_buffer->device;
+
+   for (unsigned r = 0; r < num_rects; ++r) {
+      struct blit2d_src_temps src_temps;
+      blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
+
+      assert(dst->bs % 3 == 0);
+      assert(dst->tiling == ISL_TILING_LINEAR);
+
+      uint32_t offset;
+      isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+                                         dst->tiling, 1, dst->pitch,
+                                         rects[r].dst_x, rects[r].dst_y,
+                                         &offset,
+                                         &rects[r].dst_x, &rects[r].dst_y);
+
+      /* A red surface three times as wide as the actual RGB destination */
+      struct anv_meta_blit2d_surf dst_R = {
+         .bo = dst->bo,
+         .tiling = dst->tiling,
+         .base_offset = dst->base_offset,
+         .bs = dst->bs / 3,
+         .pitch = dst->pitch,
+      };
+
+      struct blit2d_dst_temps dst_temps;
+      blit2d_bind_dst(cmd_buffer, &dst_R, offset,
+                      (rects[r].dst_x + rects[r].width) * 3,
+                      rects[r].dst_y + rects[r].height,
+                      vk_single_component_format_for_rgb_size(dst->bs),
+                      &dst_temps);
+
+      struct blit_vb_data {
+         float pos[2];
+         float tex_coord[3];
+      } *vb_data;
+
+      unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+      struct anv_state vb_state =
+         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+      memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+      vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+      vb_data[0] = (struct blit_vb_data) {
+         .pos = {
+            (rects[r].dst_x + rects[r].width) * 3,
+            rects[r].dst_y + rects[r].height,
+         },
+         .tex_coord = {
+            rects[r].src_x + rects[r].width,
+            rects[r].src_y + rects[r].height,
+            src->pitch,
+         },
+      };
+
+      vb_data[1] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x * 3,
+            rects[r].dst_y + rects[r].height,
+         },
+         .tex_coord = {
+            rects[r].src_x,
+            rects[r].src_y + rects[r].height,
+            src->pitch,
+         },
+      };
+
+      vb_data[2] = (struct blit_vb_data) {
+         .pos = {
+            rects[r].dst_x * 3,
+            rects[r].dst_y,
+         },
+         .tex_coord = {
+            rects[r].src_x,
+            rects[r].src_y,
+            src->pitch,
+         },
+      };
+
+      if (!device->info.has_llc)
+         anv_state_clflush(vb_state);
+
+      struct anv_buffer vertex_buffer = {
+         .device = device,
+         .size = vb_size,
+         .bo = &device->dynamic_state_block_pool.bo,
+         .offset = vb_state.offset,
+      };
+
+      anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+         (VkBuffer[]) {
+            anv_buffer_to_handle(&vertex_buffer),
+            anv_buffer_to_handle(&vertex_buffer)
+         },
+         (VkDeviceSize[]) {
+            0,
+            sizeof(struct anv_vue_header),
+         });
+
+      ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+         &(VkRenderPassBeginInfo) {
+            .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+            .renderPass = device->meta_state.blit2d.render_pass,
+            .framebuffer = dst_temps.fb,
+            .renderArea = {
+               .offset = { rects[r].dst_x, rects[r].dst_y, },
+               .extent = { rects[r].width, rects[r].height },
+            },
+            .clearValueCount = 0,
+            .pClearValues = NULL,
+         }, VK_SUBPASS_CONTENTS_INLINE);
+
+      bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB);
+
+      ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+      ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+      /* At the point where we emit the draw call, all data from the
+       * descriptor sets, etc. has been used.  We are free to delete it.
+       */
+      blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
+      blit2d_unbind_dst(cmd_buffer, &dst_temps);
+   }
+}
+
 void
 anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
                 struct anv_meta_blit2d_surf *src,
@@ -666,7 +821,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
                                   num_rects, rects);
       return;
    } else if (dst->bs % 3 == 0) {
-      anv_finishme("Blitting to RGB destinations not yet supported");
+      anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst,
+                              num_rects, rects);
       return;
    } else {
       assert(util_is_power_of_two(dst->bs));
@@ -892,6 +1048,61 @@ build_nir_copy_fragment_shader(struct anv_device *device,
    return b.shader;
 }
 
+/* RGB copies have the same interface as normal copies */
+#define rgb_vi_create_info normal_vi_create_info
+
+static nir_shader *
+build_nir_rgb_fragment_shader(struct anv_device *device,
+                              texel_fetch_build_func txf_func)
+{
+   const struct glsl_type *vec4 = glsl_vec4_type();
+   const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+   nir_builder b;
+
+   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+   nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+                                                  vec3, "v_tex_pos");
+   tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+                                                 vec4, "f_color");
+   color_out->data.location = FRAG_RESULT_DATA0;
+
+   /* We need gl_FragCoord so we know our position */
+   nir_variable *frag_coord_in = nir_variable_create(b.shader,
+                                                     nir_var_shader_in,
+                                                     vec4, "gl_FragCoord");
+   frag_coord_in->data.location = VARYING_SLOT_POS;
+   frag_coord_in->data.origin_upper_left = true;
+
+   nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+   unsigned swiz[4] = { 0, 1 };
+   nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
+   nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
+
+   nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
+
+   /* We figure out which component we are by the x component of FragCoord */
+   nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
+   nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0),
+                                    nir_imm_int(&b, 3));
+
+   /* Select the given channel from the texelFetch result */
+   nir_ssa_def *color_channel =
+      nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)),
+                    nir_channel(&b, color, 0),
+                    nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)),
+                                  nir_channel(&b, color, 1),
+                                  nir_channel(&b, color, 2)));
+
+   nir_ssa_def *u = nir_ssa_undef(&b, 1, 32);
+   nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1);
+
+   return b.shader;
+}
+
 static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = {
    .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
    .vertexBindingDescriptionCount = 2,
@@ -1095,7 +1306,13 @@ blit2d_init_pipeline(struct anv_device *device,
       vi_create_info = &w_tiled_vi_create_info;
       break;
    case BLIT2D_DST_TYPE_RGB:
-      /* Not yet supported */
+      /* RGB destinations and W-detiling don't mix */
+      if (src_type != BLIT2D_SRC_TYPE_NORMAL)
+         return VK_SUCCESS;
+
+      fs.nir = build_nir_rgb_fragment_shader(device, src_func);
+      vi_create_info = &rgb_vi_create_info;
+      break;
    default:
       return VK_SUCCESS;
    }
index 7ec0608..5d8dd3d 100644 (file)
@@ -25,6 +25,8 @@
 #include "anv_private.h"
 #include "nir/nir_builder.h"
 
+#include "util/u_format_rgb9e5.h"
+
 /** Vertex attributes for color clears.  */
 struct color_clear_vattrs {
    struct anv_vue_header vue_header;
@@ -754,12 +756,22 @@ static void
 anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
                     struct anv_image *image,
                     VkImageLayout image_layout,
-                    const VkClearValue *clear_value,
+                    VkClearValue clear_value,
                     uint32_t range_count,
                     const VkImageSubresourceRange *ranges)
 {
    VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
 
+   VkFormat vk_format = image->vk_format;
+   if (vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+      /* We can't actually render to this format so we have to work around it
+       * by manually unpacking and using R32_UINT.
+       */
+      clear_value.color.uint32[0] =
+         float3_to_rgb9e5(clear_value.color.float32);
+      vk_format = VK_FORMAT_R32_UINT;
+   }
+
    for (uint32_t r = 0; r < range_count; r++) {
       const VkImageSubresourceRange *range = &ranges[r];
       for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) {
@@ -773,7 +785,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
                   .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
                   .image = anv_image_to_handle(image),
                   .viewType = anv_meta_get_view_type(image),
-                  .format = image->vk_format,
+                  .format = vk_format,
                   .subresourceRange = {
                      .aspectMask = range->aspectMask,
                      .baseMipLevel = range->baseMipLevel + l,
@@ -800,7 +812,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
                &fb);
 
             VkAttachmentDescription att_desc = {
-               .format = iview.vk_format,
+               .format = vk_format,
                .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
                .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
                .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
@@ -864,7 +876,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
             VkClearAttachment clear_att = {
                .aspectMask = range->aspectMask,
                .colorAttachment = 0,
-               .clearValue = *clear_value,
+               .clearValue = clear_value,
             };
 
             VkClearRect clear_rect = {
@@ -903,7 +915,7 @@ void anv_CmdClearColorImage(
    meta_clear_begin(&saved_state, cmd_buffer);
 
    anv_cmd_clear_image(cmd_buffer, image, imageLayout,
-                       (const VkClearValue *) pColor,
+                       (VkClearValue) { .color = *pColor },
                        rangeCount, pRanges);
 
    meta_clear_end(&saved_state, cmd_buffer);
@@ -924,7 +936,7 @@ void anv_CmdClearDepthStencilImage(
    meta_clear_begin(&saved_state, cmd_buffer);
 
    anv_cmd_clear_image(cmd_buffer, image, imageLayout,
-                       (const VkClearValue *) pDepthStencil,
+                       (VkClearValue) { .depthStencil = *pDepthStencil },
                        rangeCount, pRanges);
 
    meta_clear_end(&saved_state, cmd_buffer);
@@ -1005,7 +1017,7 @@ do_buffer_fill(struct anv_cmd_buffer *cmd_buffer,
 
    anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image),
                        VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
-                       &clear_value, 1, &range);
+                       clear_value, 1, &range);
 }
 
 void anv_CmdFillBuffer(
index 5a09464..33c7fe4 100644 (file)
@@ -204,6 +204,12 @@ void anv_DestroyPipeline(
                          pAllocator ? pAllocator : &device->alloc);
    if (pipeline->blend_state.map)
       anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      if (pipeline->shaders[s])
+         anv_shader_bin_unref(device, pipeline->shaders[s]);
+   }
+
    anv_free2(&device->alloc, pAllocator, pipeline);
 }
 
@@ -391,15 +397,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
    prog_data->binding_table.image_start = bias;
 }
 
+static struct anv_shader_bin *
+anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
+                           struct anv_pipeline_cache *cache,
+                           const void *key_data, uint32_t key_size,
+                           const void *kernel_data, uint32_t kernel_size,
+                           const void *prog_data, uint32_t prog_data_size,
+                           const struct anv_pipeline_bind_map *bind_map)
+{
+   if (cache) {
+      return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
+                                              kernel_data, kernel_size,
+                                              prog_data, prog_data_size,
+                                              bind_map);
+   } else {
+      return anv_shader_bin_create(pipeline->device, key_data, key_size,
+                                   kernel_data, kernel_size,
+                                   prog_data, prog_data_size, bind_map);
+   }
+}
+
+
 static void
 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
                                 gl_shader_stage stage,
-                                const struct brw_stage_prog_data *prog_data,
-                                struct anv_pipeline_bind_map *map)
+                                struct anv_shader_bin *shader)
 {
-   pipeline->prog_data[stage] = prog_data;
+   pipeline->shaders[stage] = shader;
    pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
-   pipeline->bindings[stage] = *map;
 }
 
 static VkResult
@@ -412,20 +437,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_vs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_vs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
-      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+   if (cache) {
+      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+                      pipeline->layout, spec_info);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_vs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -464,28 +489,29 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
+      if (!bin) {
+         ralloc_free(mem_ctx);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+
       ralloc_free(mem_ctx);
    }
 
    const struct brw_vs_prog_data *vs_prog_data =
-      (const struct brw_vs_prog_data *) stage_prog_data;
+      (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin);
 
    if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
-      pipeline->vs_simd8 = kernel;
+      pipeline->vs_simd8 = bin->kernel.offset;
       pipeline->vs_vec4 = NO_KERNEL;
    } else {
       pipeline->vs_simd8 = NO_KERNEL;
-      pipeline->vs_vec4 = kernel;
+      pipeline->vs_vec4 = bin->kernel.offset;
    }
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
 
    return VK_SUCCESS;
 }
@@ -500,20 +526,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_gs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_gs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
-      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+   if (cache) {
+      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+                      pipeline->layout, spec_info);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_gs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -551,20 +577,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
       }
 
       /* TODO: SIMD8 GS */
-      stage_prog_data = &prog_data.base.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
+      if (!bin) {
+         ralloc_free(mem_ctx);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
 
       ralloc_free(mem_ctx);
    }
 
-   pipeline->gs_kernel = kernel;
+   pipeline->gs_kernel = bin->kernel.offset;
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
 
    return VK_SUCCESS;
 }
@@ -580,20 +606,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_wm_prog_key key;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
 
-   if (module->size > 0) {
-      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
-      pipeline->ps_ksp0 =
-         anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+   if (cache) {
+      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+                      pipeline->layout, spec_info);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (pipeline->ps_ksp0 == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_wm_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -633,7 +659,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
          assert(num_rts + array_len <= 8);
 
          for (unsigned i = 0; i < array_len; i++) {
-            rt_bindings[num_rts] = (struct anv_pipeline_binding) {
+            rt_bindings[num_rts + i] = (struct anv_pipeline_binding) {
                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
                .binding = 0,
                .index = rt + i,
@@ -682,19 +708,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base;
-      pipeline->ps_ksp0 =
-         anv_pipeline_cache_upload_kernel(cache,
-                                          module->size > 0 ? sha1 : NULL,
-                                          shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
+      if (!bin) {
+         ralloc_free(mem_ctx);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
 
       ralloc_free(mem_ctx);
    }
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
-                                   stage_prog_data, &map);
+   pipeline->ps_ksp0 = bin->kernel.offset;
+
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
 
    return VK_SUCCESS;
 }
@@ -709,20 +736,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
 {
    const struct brw_compiler *compiler =
       pipeline->device->instance->physicalDevice.compiler;
-   const struct brw_stage_prog_data *stage_prog_data;
    struct anv_pipeline_bind_map map;
    struct brw_cs_prog_key key;
-   uint32_t kernel = NO_KERNEL;
+   struct anv_shader_bin *bin = NULL;
    unsigned char sha1[20];
 
    populate_cs_prog_key(&pipeline->device->info, &key);
 
-   if (module->size > 0) {
-      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
-      kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+   if (cache) {
+      anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+                      pipeline->layout, spec_info);
+      bin = anv_pipeline_cache_search(cache, sha1, 20);
    }
 
-   if (module->size == 0 || kernel == NO_KERNEL) {
+   if (bin == NULL) {
       struct brw_cs_prog_data prog_data = { 0, };
       struct anv_pipeline_binding surface_to_descriptor[256];
       struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -754,20 +781,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      stage_prog_data = &prog_data.base;
-      kernel = anv_pipeline_cache_upload_kernel(cache,
-                                                module->size > 0 ? sha1 : NULL,
-                                                shader_code, code_size,
-                                                &stage_prog_data, sizeof(prog_data),
-                                                &map);
+      bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+                                       shader_code, code_size,
+                                       &prog_data, sizeof(prog_data), &map);
+      if (!bin) {
+         ralloc_free(mem_ctx);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
 
       ralloc_free(mem_ctx);
    }
 
-   pipeline->cs_simd = kernel;
+   pipeline->cs_simd = bin->kernel.offset;
 
-   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
-                                   stage_prog_data, &map);
+   anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
 
    return VK_SUCCESS;
 }
@@ -1161,8 +1188,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    /* When we free the pipeline, we detect stages based on the NULL status
     * of various prog_data pointers.  Make them NULL by default.
     */
-   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
@@ -1180,27 +1206,33 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
    }
 
    if (modules[MESA_SHADER_VERTEX]) {
-      anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
-                              modules[MESA_SHADER_VERTEX],
-                              pStages[MESA_SHADER_VERTEX]->pName,
-                              pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
+      result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
+                                       modules[MESA_SHADER_VERTEX],
+                                       pStages[MESA_SHADER_VERTEX]->pName,
+                                       pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
+      if (result != VK_SUCCESS)
+         goto compile_fail;
    }
 
    if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL])
       anv_finishme("no tessellation support");
 
    if (modules[MESA_SHADER_GEOMETRY]) {
-      anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
-                              modules[MESA_SHADER_GEOMETRY],
-                              pStages[MESA_SHADER_GEOMETRY]->pName,
-                              pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
+      result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
+                                       modules[MESA_SHADER_GEOMETRY],
+                                       pStages[MESA_SHADER_GEOMETRY]->pName,
+                                       pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
+      if (result != VK_SUCCESS)
+         goto compile_fail;
    }
 
    if (modules[MESA_SHADER_FRAGMENT]) {
-      anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra,
-                              modules[MESA_SHADER_FRAGMENT],
-                              pStages[MESA_SHADER_FRAGMENT]->pName,
-                              pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
+      result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra,
+                                       modules[MESA_SHADER_FRAGMENT],
+                                       pStages[MESA_SHADER_FRAGMENT]->pName,
+                                       pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
+      if (result != VK_SUCCESS)
+         goto compile_fail;
    }
 
    if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
@@ -1263,6 +1295,16 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
       pipeline->topology = _3DPRIM_RECTLIST;
 
    return VK_SUCCESS;
+
+compile_fail:
+   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+      if (pipeline->shaders[s])
+         anv_shader_bin_unref(device, pipeline->shaders[s]);
+   }
+
+   anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
+
+   return result;
 }
 
 VkResult
@@ -1277,9 +1319,6 @@ anv_graphics_pipeline_create(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   if (cache == NULL)
-      cache = &device->default_pipeline_cache;
-
    switch (device->info.gen) {
    case 7:
       if (device->info.is_haswell)
@@ -1333,9 +1372,6 @@ static VkResult anv_compute_pipeline_create(
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
 
-   if (cache == NULL)
-      cache = &device->default_pipeline_cache;
-
    switch (device->info.gen) {
    case 7:
       if (device->info.is_haswell)
index fbca311..2753c46 100644 (file)
  */
 
 #include "util/mesa-sha1.h"
+#include "util/hash_table.h"
 #include "util/debug.h"
 #include "anv_private.h"
 
+struct shader_bin_key {
+   uint32_t size;
+   uint8_t data[0];
+};
+
+static size_t
+anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
+                    uint32_t surface_count, uint32_t sampler_count)
+{
+   const uint32_t binding_data_size =
+      (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
+
+   return align_u32(sizeof(struct anv_shader_bin), 8) +
+          align_u32(prog_data_size, 8) +
+          align_u32(sizeof(uint32_t) + key_size, 8) +
+          align_u32(binding_data_size, 8);
+}
+
+static inline const struct shader_bin_key *
+anv_shader_bin_get_key(const struct anv_shader_bin *shader)
+{
+   const void *data = shader;
+   data += align_u32(sizeof(struct anv_shader_bin), 8);
+   data += align_u32(shader->prog_data_size, 8);
+   return data;
+}
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+                      const void *key_data, uint32_t key_size,
+                      const void *kernel_data, uint32_t kernel_size,
+                      const void *prog_data, uint32_t prog_data_size,
+                      const struct anv_pipeline_bind_map *bind_map)
+{
+   const size_t size =
+      anv_shader_bin_size(prog_data_size, key_size,
+                          bind_map->surface_count, bind_map->sampler_count);
+
+   struct anv_shader_bin *shader =
+      anv_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!shader)
+      return NULL;
+
+   shader->ref_cnt = 1;
+
+   shader->kernel =
+      anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
+   memcpy(shader->kernel.map, kernel_data, kernel_size);
+   shader->kernel_size = kernel_size;
+   shader->bind_map = *bind_map;
+   shader->prog_data_size = prog_data_size;
+
+   /* Now we fill out the floating data at the end */
+   void *data = shader;
+   data += align_u32(sizeof(struct anv_shader_bin), 8);
+
+   memcpy(data, prog_data, prog_data_size);
+   data += align_u32(prog_data_size, 8);
+
+   struct shader_bin_key *key = data;
+   key->size = key_size;
+   memcpy(key->data, key_data, key_size);
+   data += align_u32(sizeof(*key) + key_size, 8);
+
+   shader->bind_map.surface_to_descriptor = data;
+   memcpy(data, bind_map->surface_to_descriptor,
+          bind_map->surface_count * sizeof(struct anv_pipeline_binding));
+   data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
+
+   shader->bind_map.sampler_to_descriptor = data;
+   memcpy(data, bind_map->sampler_to_descriptor,
+          bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
+
+   return shader;
+}
+
+void
+anv_shader_bin_destroy(struct anv_device *device,
+                       struct anv_shader_bin *shader)
+{
+   assert(shader->ref_cnt == 0);
+   anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
+   anv_free(&device->alloc, shader);
+}
+
+static size_t
+anv_shader_bin_data_size(const struct anv_shader_bin *shader)
+{
+   return anv_shader_bin_size(shader->prog_data_size,
+                              anv_shader_bin_get_key(shader)->size,
+                              shader->bind_map.surface_count,
+                              shader->bind_map.sampler_count) +
+          align_u32(shader->kernel_size, 8);
+}
+
+static void
+anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
+{
+   size_t struct_size =
+      anv_shader_bin_size(shader->prog_data_size,
+                          anv_shader_bin_get_key(shader)->size,
+                          shader->bind_map.surface_count,
+                          shader->bind_map.sampler_count);
+
+   memcpy(data, shader, struct_size);
+   data += struct_size;
+
+   memcpy(data, shader->kernel.map, shader->kernel_size);
+}
+
 /* Remaining work:
  *
  * - Compact binding table layout so it's tight and not dependent on
  *   dual_src_blend.
  */
 
+static uint32_t
+shader_bin_key_hash_func(const void *void_key)
+{
+   const struct shader_bin_key *key = void_key;
+   return _mesa_hash_data(key->data, key->size);
+}
+
+static bool
+shader_bin_key_compare_func(const void *void_a, const void *void_b)
+{
+   const struct shader_bin_key *a = void_a, *b = void_b;
+   if (a->size != b->size)
+      return false;
+
+   return memcmp(a->data, b->data, a->size) == 0;
+}
+
 void
 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
-                        struct anv_device *device)
+                        struct anv_device *device,
+                        bool cache_enabled)
 {
    cache->device = device;
-   anv_state_stream_init(&cache->program_stream,
-                         &device->instruction_block_pool);
    pthread_mutex_init(&cache->mutex, NULL);
 
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-   cache->table_size = 1024;
-   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
-   cache->hash_table = malloc(byte_size);
-
-   /* We don't consider allocation failure fatal, we just start with a 0-sized
-    * cache. */
-   if (cache->hash_table == NULL ||
-       !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
-      cache->table_size = 0;
-   else
-      memset(cache->hash_table, 0xff, byte_size);
+   if (cache_enabled) {
+      cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
+                                             shader_bin_key_compare_func);
+   } else {
+      cache->cache = NULL;
+   }
 }
 
 void
 anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
 {
-   anv_state_stream_finish(&cache->program_stream);
    pthread_mutex_destroy(&cache->mutex);
-   free(cache->hash_table);
-}
-
-struct cache_entry {
-   unsigned char sha1[20];
-   uint32_t prog_data_size;
-   uint32_t kernel_size;
-   uint32_t surface_count;
-   uint32_t sampler_count;
-   uint32_t image_count;
-
-   char prog_data[0];
-
-   /* kernel follows prog_data at next 64 byte aligned address */
-};
-
-static uint32_t
-entry_size(struct cache_entry *entry)
-{
-   /* This returns the number of bytes needed to serialize an entry, which
-    * doesn't include the alignment padding bytes.
-    */
 
-   const uint32_t map_size =
-      entry->surface_count * sizeof(struct anv_pipeline_binding) +
-      entry->sampler_count * sizeof(struct anv_pipeline_binding);
+   if (cache->cache) {
+      /* This is a bit unfortunate.  In order to keep things from randomly
+       * going away, the shader cache has to hold a reference to all shader
+       * binaries it contains.  We unref them when we destroy the cache.
+       */
+      struct hash_entry *entry;
+      hash_table_foreach(cache->cache, entry)
+         anv_shader_bin_unref(cache->device, entry->data);
 
-   return sizeof(*entry) + entry->prog_data_size + map_size;
+      _mesa_hash_table_destroy(cache->cache, NULL);
+   }
 }
 
 void
 anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
                 struct anv_shader_module *module,
                 const char *entrypoint,
+                const struct anv_pipeline_layout *pipeline_layout,
                 const VkSpecializationInfo *spec_info)
 {
    struct mesa_sha1 *ctx;
@@ -108,6 +212,10 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
    _mesa_sha1_update(ctx, key, key_size);
    _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
    _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
+   if (pipeline_layout) {
+      _mesa_sha1_update(ctx, pipeline_layout->sha1,
+                        sizeof(pipeline_layout->sha1));
+   }
    /* hash in shader stage, pipeline layout? */
    if (spec_info) {
       _mesa_sha1_update(ctx, spec_info->pMapEntries,
@@ -117,210 +225,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
    _mesa_sha1_final(ctx, hash);
 }
 
-static uint32_t
-anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1,
-                                   const struct brw_stage_prog_data **prog_data,
-                                   struct anv_pipeline_bind_map *map)
+static struct anv_shader_bin *
+anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
+                                 const void *key_data, uint32_t key_size)
 {
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) sha1);
-
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      const uint32_t offset = cache->hash_table[index];
-
-      if (offset == ~0)
-         return NO_KERNEL;
-
-      struct cache_entry *entry =
-         cache->program_stream.block_pool->map + offset;
-      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
-         if (prog_data) {
-            assert(map);
-            void *p = entry->prog_data;
-            *prog_data = p;
-            p += entry->prog_data_size;
-            map->surface_count = entry->surface_count;
-            map->sampler_count = entry->sampler_count;
-            map->image_count = entry->image_count;
-            map->surface_to_descriptor = p;
-            p += map->surface_count * sizeof(struct anv_pipeline_binding);
-            map->sampler_to_descriptor = p;
-         }
-
-         return offset + align_u32(entry_size(entry), 64);
-      }
-   }
-
-   /* This can happen if the pipeline cache is disabled via
-    * ANV_ENABLE_PIPELINE_CACHE=false
-    */
-   return NO_KERNEL;
+   uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
+   struct shader_bin_key *key = (void *)vla;
+   key->size = key_size;
+   memcpy(key->data, key_data, key_size);
+
+   struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
+   if (entry)
+      return entry->data;
+   else
+      return NULL;
 }
 
-uint32_t
+struct anv_shader_bin *
 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                          const unsigned char *sha1,
-                          const struct brw_stage_prog_data **prog_data,
-                          struct anv_pipeline_bind_map *map)
+                          const void *key_data, uint32_t key_size)
 {
-   uint32_t kernel;
+   if (!cache->cache)
+      return NULL;
 
    pthread_mutex_lock(&cache->mutex);
 
-   kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+   struct anv_shader_bin *shader =
+      anv_pipeline_cache_search_locked(cache, key_data, key_size);
 
    pthread_mutex_unlock(&cache->mutex);
 
-   return kernel;
-}
-
-static void
-anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
-                             struct cache_entry *entry, uint32_t entry_offset)
-{
-   const uint32_t mask = cache->table_size - 1;
-   const uint32_t start = (*(uint32_t *) entry->sha1);
-
-   /* We'll always be able to insert when we get here. */
-   assert(cache->kernel_count < cache->table_size / 2);
+   /* We increment refcount before handing it to the caller */
+   if (shader)
+      anv_shader_bin_ref(shader);
 
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      const uint32_t index = (start + i) & mask;
-      if (cache->hash_table[index] == ~0) {
-         cache->hash_table[index] = entry_offset;
-         break;
-      }
-   }
-
-   cache->total_size += entry_size(entry) + entry->kernel_size;
-   cache->kernel_count++;
+   return shader;
 }
 
-static VkResult
-anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
+static struct anv_shader_bin *
+anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
+                              const void *key_data, uint32_t key_size,
+                              const void *kernel_data, uint32_t kernel_size,
+                              const void *prog_data, uint32_t prog_data_size,
+                              const struct anv_pipeline_bind_map *bind_map)
 {
-   const uint32_t table_size = cache->table_size * 2;
-   const uint32_t old_table_size = cache->table_size;
-   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
-   uint32_t *table;
-   uint32_t *old_table = cache->hash_table;
-
-   table = malloc(byte_size);
-   if (table == NULL)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-   cache->hash_table = table;
-   cache->table_size = table_size;
-   cache->kernel_count = 0;
-   cache->total_size = 0;
-
-   memset(cache->hash_table, 0xff, byte_size);
-   for (uint32_t i = 0; i < old_table_size; i++) {
-      const uint32_t offset = old_table[i];
-      if (offset == ~0)
-         continue;
+   struct anv_shader_bin *shader =
+      anv_pipeline_cache_search_locked(cache, key_data, key_size);
+   if (shader)
+      return shader;
 
-      struct cache_entry *entry =
-         cache->program_stream.block_pool->map + offset;
-      anv_pipeline_cache_set_entry(cache, entry, offset);
-   }
+   struct anv_shader_bin *bin =
+      anv_shader_bin_create(cache->device, key_data, key_size,
+                            kernel_data, kernel_size,
+                            prog_data, prog_data_size, bind_map);
+   if (!bin)
+      return NULL;
 
-   free(old_table);
+   _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
 
-   return VK_SUCCESS;
+   return bin;
 }
 
-static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
-                             struct cache_entry *entry, uint32_t entry_offset)
-{
-   if (cache->kernel_count == cache->table_size / 2)
-      anv_pipeline_cache_grow(cache);
-
-   /* Failing to grow that hash table isn't fatal, but may mean we don't
-    * have enough space to add this new kernel. Only add it if there's room.
-    */
-   if (cache->kernel_count < cache->table_size / 2)
-      anv_pipeline_cache_set_entry(cache, entry, entry_offset);
-}
-
-uint32_t
+struct anv_shader_bin *
 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
-                                 const unsigned char *sha1,
-                                 const void *kernel, size_t kernel_size,
-                                 const struct brw_stage_prog_data **prog_data,
-                                 size_t prog_data_size,
-                                 struct anv_pipeline_bind_map *map)
+                                 const void *key_data, uint32_t key_size,
+                                 const void *kernel_data, uint32_t kernel_size,
+                                 const void *prog_data, uint32_t prog_data_size,
+                                 const struct anv_pipeline_bind_map *bind_map)
 {
-   pthread_mutex_lock(&cache->mutex);
-
-   /* Before uploading, check again that another thread didn't upload this
-    * shader while we were compiling it.
-    */
-   if (sha1) {
-      uint32_t cached_kernel =
-         anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
-      if (cached_kernel != NO_KERNEL) {
-         pthread_mutex_unlock(&cache->mutex);
-         return cached_kernel;
-      }
-   }
+   if (cache->cache) {
+      pthread_mutex_lock(&cache->mutex);
 
-   struct cache_entry *entry;
+      struct anv_shader_bin *bin =
+         anv_pipeline_cache_add_shader(cache, key_data, key_size,
+                                       kernel_data, kernel_size,
+                                       prog_data, prog_data_size, bind_map);
 
-   const uint32_t map_size =
-      map->surface_count * sizeof(struct anv_pipeline_binding) +
-      map->sampler_count * sizeof(struct anv_pipeline_binding);
+      pthread_mutex_unlock(&cache->mutex);
 
-   const uint32_t preamble_size =
-      align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
+      /* We increment refcount before handing it to the caller */
+      anv_shader_bin_ref(bin);
 
-   const uint32_t size = preamble_size + kernel_size;
-
-   assert(size < cache->program_stream.block_pool->block_size);
-   const struct anv_state state =
-      anv_state_stream_alloc(&cache->program_stream, size, 64);
-
-   entry = state.map;
-   entry->prog_data_size = prog_data_size;
-   entry->surface_count = map->surface_count;
-   entry->sampler_count = map->sampler_count;
-   entry->image_count = map->image_count;
-   entry->kernel_size = kernel_size;
-
-   void *p = entry->prog_data;
-   memcpy(p, *prog_data, prog_data_size);
-   p += prog_data_size;
-
-   memcpy(p, map->surface_to_descriptor,
-          map->surface_count * sizeof(struct anv_pipeline_binding));
-   map->surface_to_descriptor = p;
-   p += map->surface_count * sizeof(struct anv_pipeline_binding);
-
-   memcpy(p, map->sampler_to_descriptor,
-          map->sampler_count * sizeof(struct anv_pipeline_binding));
-   map->sampler_to_descriptor = p;
-
-   if (sha1) {
-      assert(anv_pipeline_cache_search_unlocked(cache, sha1,
-                                                NULL, NULL) == NO_KERNEL);
-
-      memcpy(entry->sha1, sha1, sizeof(entry->sha1));
-      anv_pipeline_cache_add_entry(cache, entry, state.offset);
+      return bin;
+   } else {
+      /* In this case, we're not caching it so the caller owns it entirely */
+      return anv_shader_bin_create(cache->device, key_data, key_size,
+                                   kernel_data, kernel_size,
+                                   prog_data, prog_data_size, bind_map);
    }
-
-   pthread_mutex_unlock(&cache->mutex);
-
-   memcpy(state.map + preamble_size, kernel, kernel_size);
-
-   if (!cache->device->info.has_llc)
-      anv_state_clflush(state);
-
-   *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
-   return state.offset + preamble_size;
 }
 
 struct cache_header {
@@ -339,6 +331,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    struct cache_header header;
    uint8_t uuid[VK_UUID_SIZE];
 
+   if (cache->cache == NULL)
+      return;
+
    if (size < sizeof(header))
       return;
    memcpy(&header, data, sizeof(header));
@@ -354,38 +349,62 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
    if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
       return;
 
-   void *end = (void *) data + size;
-   void *p = (void *) data + header.header_size;
-
-   while (p < end) {
-      struct cache_entry *entry = p;
-
-      void *data = entry->prog_data;
-      const struct brw_stage_prog_data *prog_data = data;
-      data += entry->prog_data_size;
-
-      struct anv_pipeline_binding *surface_to_descriptor = data;
-      data += entry->surface_count * sizeof(struct anv_pipeline_binding);
-      struct anv_pipeline_binding *sampler_to_descriptor = data;
-      data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
-      void *kernel = data;
-
-      struct anv_pipeline_bind_map map = {
-         .surface_count = entry->surface_count,
-         .sampler_count = entry->sampler_count,
-         .image_count = entry->image_count,
-         .surface_to_descriptor = surface_to_descriptor,
-         .sampler_to_descriptor = sampler_to_descriptor
-      };
-
-      anv_pipeline_cache_upload_kernel(cache, entry->sha1,
-                                       kernel, entry->kernel_size,
-                                       &prog_data,
-                                       entry->prog_data_size, &map);
-      p = kernel + entry->kernel_size;
+   const void *end = data + size;
+   const void *p = data + header.header_size;
+
+   /* Count is the total number of valid entries */
+   uint32_t count;
+   if (p + sizeof(count) >= end)
+      return;
+   memcpy(&count, p, sizeof(count));
+   p += align_u32(sizeof(count), 8);
+
+   for (uint32_t i = 0; i < count; i++) {
+      struct anv_shader_bin bin;
+      if (p + sizeof(bin) > end)
+         break;
+      memcpy(&bin, p, sizeof(bin));
+      p += align_u32(sizeof(struct anv_shader_bin), 8);
+
+      const void *prog_data = p;
+      p += align_u32(bin.prog_data_size, 8);
+
+      struct shader_bin_key key;
+      if (p + sizeof(key) > end)
+         break;
+      memcpy(&key, p, sizeof(key));
+      const void *key_data = p + sizeof(key);
+      p += align_u32(sizeof(key) + key.size, 8);
+
+      /* We're going to memcpy this so getting rid of const is fine */
+      struct anv_pipeline_binding *bindings = (void *)p;
+      p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
+                     sizeof(struct anv_pipeline_binding), 8);
+      bin.bind_map.surface_to_descriptor = bindings;
+      bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
+
+      const void *kernel_data = p;
+      p += align_u32(bin.kernel_size, 8);
+
+      if (p > end)
+         break;
+
+      anv_pipeline_cache_add_shader(cache, key_data, key.size,
+                                    kernel_data, bin.kernel_size,
+                                    prog_data, bin.prog_data_size,
+                                    &bin.bind_map);
    }
 }
 
+static bool
+pipeline_cache_enabled()
+{
+   static int enabled = -1;
+   if (enabled < 0)
+      enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
+   return enabled;
+}
+
 VkResult anv_CreatePipelineCache(
     VkDevice                                    _device,
     const VkPipelineCacheCreateInfo*            pCreateInfo,
@@ -404,7 +423,7 @@ VkResult anv_CreatePipelineCache(
    if (cache == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   anv_pipeline_cache_init(cache, device);
+   anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
 
    if (pCreateInfo->initialDataSize > 0)
       anv_pipeline_cache_load(cache,
@@ -439,9 +458,16 @@ VkResult anv_GetPipelineCacheData(
    ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
    struct cache_header *header;
 
-   const size_t size = sizeof(*header) + cache->total_size;
-
    if (pData == NULL) {
+      size_t size = align_u32(sizeof(*header), 8) +
+                    align_u32(sizeof(uint32_t), 8);
+
+      if (cache->cache) {
+         struct hash_entry *entry;
+         hash_table_foreach(cache->cache, entry)
+            size += anv_shader_bin_data_size(entry->data);
+      }
+
       *pDataSize = size;
       return VK_SUCCESS;
    }
@@ -458,25 +484,25 @@ VkResult anv_GetPipelineCacheData(
    header->vendor_id = 0x8086;
    header->device_id = device->chipset_id;
    anv_device_get_cache_uuid(header->uuid);
-   p += header->header_size;
+   p += align_u32(header->header_size, 8);
 
-   struct cache_entry *entry;
-   for (uint32_t i = 0; i < cache->table_size; i++) {
-      if (cache->hash_table[i] == ~0)
-         continue;
+   uint32_t *count = p;
+   p += align_u32(sizeof(*count), 8);
+   *count = 0;
 
-      entry = cache->program_stream.block_pool->map + cache->hash_table[i];
-      const uint32_t size = entry_size(entry);
-      if (end < p + size + entry->kernel_size)
-         break;
-
-      memcpy(p, entry, size);
-      p += size;
+   if (cache->cache) {
+      struct hash_entry *entry;
+      hash_table_foreach(cache->cache, entry) {
+         struct anv_shader_bin *shader = entry->data;
+         size_t data_size = anv_shader_bin_data_size(entry->data);
+         if (p + data_size > end)
+            break;
 
-      void *kernel = (void *) entry + align_u32(size, 64);
+         anv_shader_bin_write_data(shader, p);
+         p += data_size;
 
-      memcpy(p, kernel, entry->kernel_size);
-      p += entry->kernel_size;
+         (*count)++;
+      }
    }
 
    *pDataSize = p - pData;
@@ -484,25 +510,6 @@ VkResult anv_GetPipelineCacheData(
    return VK_SUCCESS;
 }
 
-static void
-anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
-                         struct anv_pipeline_cache *src)
-{
-   for (uint32_t i = 0; i < src->table_size; i++) {
-      const uint32_t offset = src->hash_table[i];
-      if (offset == ~0)
-         continue;
-
-      struct cache_entry *entry =
-         src->program_stream.block_pool->map + offset;
-
-      if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
-         continue;
-
-      anv_pipeline_cache_add_entry(dst, entry, offset);
-   }
-}
-
 VkResult anv_MergePipelineCaches(
     VkDevice                                    _device,
     VkPipelineCache                             destCache,
@@ -511,10 +518,23 @@ VkResult anv_MergePipelineCaches(
 {
    ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
 
+   if (!dst->cache)
+      return VK_SUCCESS;
+
    for (uint32_t i = 0; i < srcCacheCount; i++) {
       ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
+      if (!src->cache)
+         continue;
+
+      struct hash_entry *entry;
+      hash_table_foreach(src->cache, entry) {
+         struct anv_shader_bin *bin = entry->data;
+         if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
+            continue;
 
-      anv_pipeline_cache_merge(dst, src);
+         anv_shader_bin_ref(bin);
+         _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
+      }
    }
 
    return VK_SUCCESS;
index 50b860c..8b57e1b 100644 (file)
@@ -83,6 +83,12 @@ extern "C" {
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
 static inline uint32_t
+align_down_npot_u32(uint32_t v, uint32_t a)
+{
+   return v - (v % a);
+}
+
+static inline uint32_t
 align_u32(uint32_t v, uint32_t a)
 {
    assert(a != 0 && a == (a & -a));
@@ -394,9 +400,9 @@ struct anv_fixed_size_state_pool {
 };
 
 #define ANV_MIN_STATE_SIZE_LOG2 6
-#define ANV_MAX_STATE_SIZE_LOG2 10
+#define ANV_MAX_STATE_SIZE_LOG2 17
 
-#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2)
+#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
 
 struct anv_state_pool {
    struct anv_block_pool *block_pool;
@@ -652,31 +658,27 @@ struct anv_queue {
 
 struct anv_pipeline_cache {
    struct anv_device *                          device;
-   struct anv_state_stream                      program_stream;
    pthread_mutex_t                              mutex;
 
-   uint32_t                                     total_size;
-   uint32_t                                     table_size;
-   uint32_t                                     kernel_count;
-   uint32_t *                                   hash_table;
+   struct hash_table *                          cache;
 };
 
 struct anv_pipeline_bind_map;
 
 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
-                             struct anv_device *device);
+                             struct anv_device *device,
+                             bool cache_enabled);
 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
-uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
-                                   const unsigned char *sha1,
-                                   const struct brw_stage_prog_data **prog_data,
-                                   struct anv_pipeline_bind_map *map);
-uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
-                                          const unsigned char *sha1,
-                                          const void *kernel,
-                                          size_t kernel_size,
-                                          const struct brw_stage_prog_data **prog_data,
-                                          size_t prog_data_size,
-                                          struct anv_pipeline_bind_map *map);
+
+struct anv_shader_bin *
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+                          const void *key, uint32_t key_size);
+struct anv_shader_bin *
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+                                 const void *key_data, uint32_t key_size,
+                                 const void *kernel_data, uint32_t kernel_size,
+                                 const void *prog_data, uint32_t prog_data_size,
+                                 const struct anv_pipeline_bind_map *bind_map);
 
 struct anv_device {
     VK_LOADER_DATA                              _loader_data;
@@ -698,7 +700,7 @@ struct anv_device {
     struct anv_state_pool                       dynamic_state_pool;
 
     struct anv_block_pool                       instruction_block_pool;
-    struct anv_pipeline_cache                   default_pipeline_cache;
+    struct anv_state_pool                       instruction_state_pool;
 
     struct anv_block_pool                       surface_state_block_pool;
     struct anv_state_pool                       surface_state_pool;
@@ -1057,6 +1059,8 @@ struct anv_pipeline_layout {
    struct {
       bool has_dynamic_offsets;
    } stage[MESA_SHADER_STAGES];
+
+   unsigned char sha1[20];
 };
 
 struct anv_buffer {
@@ -1422,6 +1426,7 @@ struct anv_shader_module {
 void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
                      struct anv_shader_module *module,
                      const char *entrypoint,
+                     const struct anv_pipeline_layout *pipeline_layout,
                      const VkSpecializationInfo *spec_info);
 
 static inline gl_shader_stage
@@ -1449,13 +1454,57 @@ struct anv_pipeline_bind_map {
    uint32_t surface_count;
    uint32_t sampler_count;
    uint32_t image_count;
-   uint32_t attachment_count;
 
    struct anv_pipeline_binding *                surface_to_descriptor;
    struct anv_pipeline_binding *                sampler_to_descriptor;
-   uint32_t *                                   surface_to_attachment;
 };
 
+struct anv_shader_bin {
+   uint32_t ref_cnt;
+
+   struct anv_state kernel;
+   uint32_t kernel_size;
+
+   struct anv_pipeline_bind_map bind_map;
+
+   uint32_t prog_data_size;
+
+   /* Prog data follows, then the key, both aligned to 8-bytes */
+};
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+                      const void *key, uint32_t key_size,
+                      const void *kernel, uint32_t kernel_size,
+                      const void *prog_data, uint32_t prog_data_size,
+                      const struct anv_pipeline_bind_map *bind_map);
+
+void
+anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
+
+static inline void
+anv_shader_bin_ref(struct anv_shader_bin *shader)
+{
+   assert(shader->ref_cnt >= 1);
+   __sync_fetch_and_add(&shader->ref_cnt, 1);
+}
+
+static inline void
+anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
+{
+   assert(shader->ref_cnt >= 1);
+   if (__sync_fetch_and_add(&shader->ref_cnt, -1) == 1)
+      anv_shader_bin_destroy(device, shader);
+}
+
+static inline const struct brw_stage_prog_data *
+anv_shader_bin_get_prog_data(const struct anv_shader_bin *shader)
+{
+   const void *data = shader;
+   data += align_u32(sizeof(struct anv_shader_bin), 8);
+   return data;
+}
+
 struct anv_pipeline {
    struct anv_device *                          device;
    struct anv_batch                             batch;
@@ -1465,12 +1514,12 @@ struct anv_pipeline {
    struct anv_dynamic_state                     dynamic_state;
 
    struct anv_pipeline_layout *                 layout;
-   struct anv_pipeline_bind_map                 bindings[MESA_SHADER_STAGES];
 
    bool                                         use_repclear;
    bool                                         needs_data_cache;
 
-   const struct brw_stage_prog_data *           prog_data[MESA_SHADER_STAGES];
+   struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
+
    struct {
       uint32_t                                  start[MESA_SHADER_GEOMETRY + 1];
       uint32_t                                  size[MESA_SHADER_GEOMETRY + 1];
@@ -1513,29 +1562,29 @@ struct anv_pipeline {
    } gen9;
 };
 
-static inline const struct brw_vs_prog_data *
-get_vs_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX];
-}
-
-static inline const struct brw_gs_prog_data *
-get_gs_prog_data(struct anv_pipeline *pipeline)
+static inline bool
+anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
+                       gl_shader_stage stage)
 {
-   return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY];
+   return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
 }
 
-static inline const struct brw_wm_prog_data *
-get_wm_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT];
+#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage)                   \
+static inline const struct brw_##prefix##_prog_data *                \
+get_##prefix##_prog_data(struct anv_pipeline *pipeline)              \
+{                                                                    \
+   if (anv_pipeline_has_stage(pipeline, stage)) {                    \
+      return (const struct brw_##prefix##_prog_data *)               \
+             anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \
+   } else {                                                          \
+      return NULL;                                                   \
+   }                                                                 \
 }
 
-static inline const struct brw_cs_prog_data *
-get_cs_prog_data(struct anv_pipeline *pipeline)
-{
-   return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE];
-}
+ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
+ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
+ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
+ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
 
 struct anv_graphics_pipeline_create_info {
    /**
index 89cb51f..d1b18e0 100644 (file)
@@ -75,76 +75,6 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline,
    GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
 }
 
-static void
-gen7_emit_cb_state(struct anv_pipeline *pipeline,
-                   const VkPipelineColorBlendStateCreateInfo *info,
-                   const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
-   struct anv_device *device = pipeline->device;
-
-   if (info == NULL || info->attachmentCount == 0) {
-      pipeline->blend_state =
-         anv_state_pool_emit(&device->dynamic_state_pool,
-            GENX(BLEND_STATE), 64,
-            .ColorBufferBlendEnable = false,
-            .WriteDisableAlpha = true,
-            .WriteDisableRed = true,
-            .WriteDisableGreen = true,
-            .WriteDisableBlue = true);
-   } else {
-      const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
-      struct GENX(BLEND_STATE) blend = {
-         .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
-         .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
-
-         .LogicOpEnable = info->logicOpEnable,
-         .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
-         .ColorBufferBlendEnable = a->blendEnable,
-         .ColorClampRange = COLORCLAMP_RTFORMAT,
-         .PreBlendColorClampEnable = true,
-         .PostBlendColorClampEnable = true,
-         .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
-         .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
-         .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
-         .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
-         .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
-         .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
-         .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
-         .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
-         .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
-         .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
-      };
-
-      /* Our hardware applies the blend factor prior to the blend function
-       * regardless of what function is used.  Technically, this means the
-       * hardware can do MORE than GL or Vulkan specify.  However, it also
-       * means that, for MIN and MAX, we have to stomp the blend factor to
-       * ONE to make it a no-op.
-       */
-      if (a->colorBlendOp == VK_BLEND_OP_MIN ||
-          a->colorBlendOp == VK_BLEND_OP_MAX) {
-         blend.SourceBlendFactor = BLENDFACTOR_ONE;
-         blend.DestinationBlendFactor = BLENDFACTOR_ONE;
-      }
-      if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
-          a->alphaBlendOp == VK_BLEND_OP_MAX) {
-         blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
-         blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
-      }
-
-      pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool,
-                                                   GENX(BLEND_STATE_length) * 4,
-                                                   64);
-      GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend);
-      if (pipeline->device->info.has_llc)
-         anv_state_clflush(pipeline->blend_state);
-    }
-
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
-      bsp.BlendStatePointer = pipeline->blend_state.offset;
-   }
-}
-
 VkResult
 genX(graphics_pipeline_create)(
     VkDevice                                    _device,
@@ -182,31 +112,13 @@ genX(graphics_pipeline_create)(
 
    emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
 
-   gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
-                                pCreateInfo->pMultisampleState);
+   emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
+                           pCreateInfo->pMultisampleState);
 
    emit_urb_setup(pipeline);
 
-   const VkPipelineRasterizationStateCreateInfo *rs_info =
-      pCreateInfo->pRasterizationState;
-
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
-      clip.FrontWinding             = vk_to_gen_front_face[rs_info->frontFace],
-      clip.CullMode                 = vk_to_gen_cullmode[rs_info->cullMode],
-      clip.ClipEnable               = !(extra && extra->use_rectlist),
-      clip.APIMode                  = APIMODE_OGL,
-      clip.ViewportXYClipTestEnable = true,
-      clip.ViewportZClipTestEnable  = !pipeline->depth_clamp_enable,
-      clip.ClipMode                 = CLIPMODE_NORMAL,
-
-      clip.TriangleStripListProvokingVertexSelect   = 0,
-      clip.LineStripListProvokingVertexSelect       = 0,
-      clip.TriangleFanProvokingVertexSelect         = 1,
-
-      clip.MinimumPointWidth        = 0.125,
-      clip.MaximumPointWidth        = 255.875,
-      clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
-   }
+   emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+                     pCreateInfo->pRasterizationState, extra);
 
    if (pCreateInfo->pMultisampleState &&
        pCreateInfo->pMultisampleState->rasterizationSamples > 1)
@@ -385,6 +297,7 @@ genX(graphics_pipeline_create)(
          wm.LineEndCapAntialiasingRegionWidth   = 0; /* 0.5 pixels */
          wm.LineAntialiasingRegionWidth         = 1; /* 1.0 pixels */
          wm.PointRasterizationRule              = RASTRULE_UPPER_RIGHT;
+         wm.PixelShaderKillPixel                = wm_prog_data->uses_kill;
          wm.PixelShaderComputedDepthMode        = wm_prog_data->computed_depth_mode;
          wm.PixelShaderUsesSourceDepth          = wm_prog_data->uses_src_depth;
          wm.PixelShaderUsesSourceW              = wm_prog_data->uses_src_w;
index 6d70df6..cc10d3a 100644 (file)
@@ -100,123 +100,6 @@ emit_rs_state(struct anv_pipeline *pipeline,
 }
 
 static void
-emit_cb_state(struct anv_pipeline *pipeline,
-              const VkPipelineColorBlendStateCreateInfo *info,
-              const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
-   struct anv_device *device = pipeline->device;
-
-   uint32_t num_dwords = GENX(BLEND_STATE_length);
-   pipeline->blend_state =
-      anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
-
-   struct GENX(BLEND_STATE) blend_state = {
-      .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
-      .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
-   };
-
-   /* Default everything to disabled */
-   for (uint32_t i = 0; i < 8; i++) {
-      blend_state.Entry[i].WriteDisableAlpha = true;
-      blend_state.Entry[i].WriteDisableRed = true;
-      blend_state.Entry[i].WriteDisableGreen = true;
-      blend_state.Entry[i].WriteDisableBlue = true;
-   }
-
-   struct anv_pipeline_bind_map *map =
-      &pipeline->bindings[MESA_SHADER_FRAGMENT];
-
-   bool has_writeable_rt = false;
-   for (unsigned i = 0; i < map->surface_count; i++) {
-      struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
-
-      /* All color attachments are at the beginning of the binding table */
-      if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
-         break;
-
-      /* We can have at most 8 attachments */
-      assert(i < 8);
-
-      if (binding->index >= info->attachmentCount)
-         continue;
-
-      assert(binding->binding == 0);
-      const VkPipelineColorBlendAttachmentState *a =
-         &info->pAttachments[binding->index];
-
-      if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
-          a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
-          a->colorBlendOp != a->alphaBlendOp) {
-         blend_state.IndependentAlphaBlendEnable = true;
-      }
-
-      blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
-         .LogicOpEnable = info->logicOpEnable,
-         .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
-         .ColorBufferBlendEnable = a->blendEnable,
-         .PreBlendSourceOnlyClampEnable = false,
-         .ColorClampRange = COLORCLAMP_RTFORMAT,
-         .PreBlendColorClampEnable = true,
-         .PostBlendColorClampEnable = true,
-         .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
-         .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
-         .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
-         .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
-         .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
-         .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
-         .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
-         .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
-         .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
-         .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
-      };
-
-      if (a->colorWriteMask != 0)
-         has_writeable_rt = true;
-
-      /* Our hardware applies the blend factor prior to the blend function
-       * regardless of what function is used.  Technically, this means the
-       * hardware can do MORE than GL or Vulkan specify.  However, it also
-       * means that, for MIN and MAX, we have to stomp the blend factor to
-       * ONE to make it a no-op.
-       */
-      if (a->colorBlendOp == VK_BLEND_OP_MIN ||
-          a->colorBlendOp == VK_BLEND_OP_MAX) {
-         blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
-         blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
-      }
-      if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
-          a->alphaBlendOp == VK_BLEND_OP_MAX) {
-         blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
-         blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
-      }
-   }
-
-   struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
-
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
-      blend.AlphaToCoverageEnable         = blend_state.AlphaToCoverageEnable;
-      blend.HasWriteableRT                = has_writeable_rt;
-      blend.ColorBufferBlendEnable        = bs0->ColorBufferBlendEnable;
-      blend.SourceAlphaBlendFactor        = bs0->SourceAlphaBlendFactor;
-      blend.DestinationAlphaBlendFactor   = bs0->DestinationAlphaBlendFactor;
-      blend.SourceBlendFactor             = bs0->SourceBlendFactor;
-      blend.DestinationBlendFactor        = bs0->DestinationBlendFactor;
-      blend.AlphaTestEnable               = false;
-      blend.IndependentAlphaBlendEnable   =
-         blend_state.IndependentAlphaBlendEnable;
-   }
-
-   GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
-   if (!device->info.has_llc)
-      anv_state_clflush(pipeline->blend_state);
-
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
-      bsp.BlendStatePointer      = pipeline->blend_state.offset;
-      bsp.BlendStatePointerValid = true;
-   }
-}
-
-static void
 emit_ms_state(struct anv_pipeline *pipeline,
               const VkPipelineMultisampleStateCreateInfo *info)
 {
@@ -303,29 +186,10 @@ genX(graphics_pipeline_create)(
 
    emit_urb_setup(pipeline);
 
-   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
-   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
-      clip.ClipEnable               = !(extra && extra->use_rectlist);
-      clip.EarlyCullEnable          = true;
-      clip.APIMode                  = 1; /* D3D */
-      clip.ViewportXYClipTestEnable = true;
-
-      clip.ClipMode =
-         pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
-         REJECT_ALL : NORMAL;
-
-      clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
-         (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
-
-      clip.TriangleStripListProvokingVertexSelect  = 0;
-      clip.LineStripListProvokingVertexSelect      = 0;
-      clip.TriangleFanProvokingVertexSelect        = 1;
-
-      clip.MinimumPointWidth  = 0.125;
-      clip.MaximumPointWidth  = 255.875;
-      clip.MaximumVPIndex     = pCreateInfo->pViewportState->viewportCount - 1;
-   }
+   emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+                     pCreateInfo->pRasterizationState, extra);
 
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
       wm.StatisticsEnable                    = true;
       wm.LineEndCapAntialiasingRegionWidth   = _05pixels;
index 741d5bf..f92d856 100644 (file)
@@ -1448,4 +1448,17 @@ void genX(CmdCopyQueryPoolResults)(
    }
 }
 
+#else
+void genX(CmdCopyQueryPoolResults)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    firstQuery,
+    uint32_t                                    queryCount,
+    VkBuffer                                    destBuffer,
+    VkDeviceSize                                destOffset,
+    VkDeviceSize                                destStride,
+    VkQueryResultFlags                          flags)
+{
+   anv_finishme("Queries not yet supported on Ivy Bridge");
+}
 #endif
index 0d36e3c..a74071c 100644 (file)
@@ -315,10 +315,14 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline)
    bool needs_dc = false, needs_slm = false;
 
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i];
+      if (!anv_pipeline_has_stage(pipeline, i))
+         continue;
+
+      const struct brw_stage_prog_data *prog_data =
+         anv_shader_bin_get_prog_data(pipeline->shaders[i]);
 
       needs_dc |= pipeline->needs_data_cache;
-      needs_slm |= prog_data && prog_data->total_shared;
+      needs_slm |= prog_data->total_shared;
    }
 
    return get_default_l3_weights(&pipeline->device->info,
index 5cbcfd2..7d8129d 100644 (file)
@@ -63,8 +63,7 @@ genX(compute_pipeline_create)(
    /* When we free the pipeline, we detect stages based on the NULL status
     * of various prog_data pointers.  Make them NULL by default.
     */
-   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
-   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
 
    pipeline->vs_simd8 = NO_KERNEL;
    pipeline->vs_vec4 = NO_KERNEL;
@@ -76,9 +75,13 @@ genX(compute_pipeline_create)(
 
    assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
    ANV_FROM_HANDLE(anv_shader_module, module,  pCreateInfo->stage.module);
-   anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
-                           pCreateInfo->stage.pName,
-                           pCreateInfo->stage.pSpecializationInfo);
+   result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
+                                    pCreateInfo->stage.pName,
+                                    pCreateInfo->stage.pSpecializationInfo);
+   if (result != VK_SUCCESS) {
+      anv_free2(&device->alloc, pAllocator, pipeline);
+      return result;
+   }
 
    pipeline->use_repclear = false;
 
index 669b456..94692e4 100644 (file)
@@ -291,6 +291,11 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
       if (input_index < 0)
          continue;
 
+      if (attr == VARYING_SLOT_PNTC) {
+         sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
+         continue;
+      }
+
       const int slot = fs_input_map->varying_to_slot[attr];
 
       if (input_index >= 16)
@@ -512,3 +517,177 @@ emit_ds_state(struct anv_pipeline *pipeline,
    GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil);
 #endif
 }
+
+static void
+emit_cb_state(struct anv_pipeline *pipeline,
+              const VkPipelineColorBlendStateCreateInfo *info,
+              const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+   struct anv_device *device = pipeline->device;
+
+   const uint32_t num_dwords = GENX(BLEND_STATE_length);
+   pipeline->blend_state =
+      anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
+
+   struct GENX(BLEND_STATE) blend_state = {
+#if GEN_GEN >= 8
+      .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+      .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+#else
+      /* Make sure it gets zeroed */
+      .Entry = { { 0, }, },
+#endif
+   };
+
+   /* Default everything to disabled */
+   for (uint32_t i = 0; i < 8; i++) {
+      blend_state.Entry[i].WriteDisableAlpha = true;
+      blend_state.Entry[i].WriteDisableRed = true;
+      blend_state.Entry[i].WriteDisableGreen = true;
+      blend_state.Entry[i].WriteDisableBlue = true;
+   }
+
+   uint32_t surface_count = 0;
+   struct anv_pipeline_bind_map *map;
+   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+      map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
+      surface_count = map->surface_count;
+   }
+
+   bool has_writeable_rt = false;
+   for (unsigned i = 0; i < surface_count; i++) {
+      struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
+
+      /* All color attachments are at the beginning of the binding table */
+      if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
+         break;
+
+      /* We can have at most 8 attachments */
+      assert(i < 8);
+
+      if (binding->index >= info->attachmentCount)
+         continue;
+
+      assert(binding->binding == 0);
+      const VkPipelineColorBlendAttachmentState *a =
+         &info->pAttachments[binding->index];
+
+      blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
+#if GEN_GEN < 8
+         .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+         .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+#endif
+         .LogicOpEnable = info->logicOpEnable,
+         .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+         .ColorBufferBlendEnable = a->blendEnable,
+         .ColorClampRange = COLORCLAMP_RTFORMAT,
+         .PreBlendColorClampEnable = true,
+         .PostBlendColorClampEnable = true,
+         .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
+         .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
+         .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
+         .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
+         .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
+         .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
+         .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+         .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+         .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+         .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+      };
+
+      if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
+          a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
+          a->colorBlendOp != a->alphaBlendOp) {
+#if GEN_GEN >= 8
+         blend_state.IndependentAlphaBlendEnable = true;
+#else
+         blend_state.Entry[i].IndependentAlphaBlendEnable = true;
+#endif
+      }
+
+      if (a->colorWriteMask != 0)
+         has_writeable_rt = true;
+
+      /* Our hardware applies the blend factor prior to the blend function
+       * regardless of what function is used.  Technically, this means the
+       * hardware can do MORE than GL or Vulkan specify.  However, it also
+       * means that, for MIN and MAX, we have to stomp the blend factor to
+       * ONE to make it a no-op.
+       */
+      if (a->colorBlendOp == VK_BLEND_OP_MIN ||
+          a->colorBlendOp == VK_BLEND_OP_MAX) {
+         blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
+         blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
+      }
+      if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
+          a->alphaBlendOp == VK_BLEND_OP_MAX) {
+         blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
+         blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
+      }
+   }
+
+#if GEN_GEN >= 8
+   struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
+      blend.AlphaToCoverageEnable         = blend_state.AlphaToCoverageEnable;
+      blend.HasWriteableRT                = has_writeable_rt;
+      blend.ColorBufferBlendEnable        = bs0->ColorBufferBlendEnable;
+      blend.SourceAlphaBlendFactor        = bs0->SourceAlphaBlendFactor;
+      blend.DestinationAlphaBlendFactor   = bs0->DestinationAlphaBlendFactor;
+      blend.SourceBlendFactor             = bs0->SourceBlendFactor;
+      blend.DestinationBlendFactor        = bs0->DestinationBlendFactor;
+      blend.AlphaTestEnable               = false;
+      blend.IndependentAlphaBlendEnable   =
+         blend_state.IndependentAlphaBlendEnable;
+   }
+#else
+   (void)has_writeable_rt;
+#endif
+
+   GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
+   if (!device->info.has_llc)
+      anv_state_clflush(pipeline->blend_state);
+
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+      bsp.BlendStatePointer      = pipeline->blend_state.offset;
+#if GEN_GEN >= 8
+      bsp.BlendStatePointerValid = true;
+#endif
+   }
+}
+
+static void
+emit_3dstate_clip(struct anv_pipeline *pipeline,
+                  const VkPipelineViewportStateCreateInfo *vp_info,
+                  const VkPipelineRasterizationStateCreateInfo *rs_info,
+                  const struct anv_graphics_pipeline_create_info *extra)
+{
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+   (void) wm_prog_data;
+   anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
+      clip.ClipEnable               = !(extra && extra->use_rectlist);
+      clip.EarlyCullEnable          = true;
+      clip.APIMode                  = APIMODE_D3D,
+      clip.ViewportXYClipTestEnable = true;
+
+      clip.ClipMode = rs_info->rasterizerDiscardEnable ?
+         CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
+
+      clip.TriangleStripListProvokingVertexSelect = 0;
+      clip.LineStripListProvokingVertexSelect     = 0;
+      clip.TriangleFanProvokingVertexSelect       = 1;
+
+      clip.MinimumPointWidth = 0.125;
+      clip.MaximumPointWidth = 255.875;
+      clip.MaximumVPIndex    = vp_info->viewportCount - 1;
+
+#if GEN_GEN == 7
+      clip.FrontWinding            = vk_to_gen_front_face[rs_info->frontFace];
+      clip.CullMode                = vk_to_gen_cullmode[rs_info->cullMode];
+      clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
+#else
+      clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
+         (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
+#endif
+   }
+}
index 522fba3..56ffc5d 100644 (file)
 #include "xmlpool.h"
 #endif
 #endif
-#ifdef HAVE_SYSFS
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
 #endif
 #include "loader.h"
 
index 896f225..67d0c2c 100644 (file)
@@ -785,6 +785,7 @@ loader_dri3_open(xcb_connection_t *conn,
    }
 
    fd = xcb_dri3_open_reply_fds(conn, reply)[0];
+   free(reply);
    fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
 
    return fd;
index 38faccc..8f3fa91 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
 
 __asm__(".text\n"
         ".balign 32\n"
@@ -54,8 +59,8 @@ entry_patch_public(void)
 {
 }
 
-static char
-x86_64_entry_start[];
+extern char
+x86_64_entry_start[] HIDDEN;
 
 mapi_func
 entry_get_public(int slot)
index 46d2ece..545b5a3 100644 (file)
 
 #include <string.h>
 
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
+
 __asm__(".text");
 
 __asm__("x86_current_tls:\n\t"
@@ -71,8 +77,8 @@ __asm__(".text");
 extern unsigned long
 x86_current_tls();
 
-static char x86_entry_start[];
-static char x86_entry_end[];
+extern char x86_entry_start[] HIDDEN;
+extern char x86_entry_end[] HIDDEN;
 
 void
 entry_patch_public(void)
index ea7bacb..0c28c8f 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
 
 #define X86_ENTRY_SIZE 32
 
@@ -58,8 +63,8 @@ __asm__(".balign 32\n"
 #include <string.h>
 #include "u_execmem.h"
 
-static const char x86_entry_start[];
-static const char x86_entry_end[];
+extern const char x86_entry_start[] HIDDEN;
+extern const char x86_entry_end[] HIDDEN;
 
 void
 entry_patch_public(void)
index 84db5a8..bd5b3d3 100644 (file)
@@ -645,11 +645,11 @@ INCLUDE_DIRS = \
        -I$(top_builddir)/src \
        -I$(top_srcdir)/src \
        -I$(top_builddir)/src/compiler/nir \
-       -I$(top_srcdir)/src/mesa \
        -I$(top_builddir)/src/mesa \
-       -I$(top_srcdir)/src/mesa/main \
+       -I$(top_srcdir)/src/mesa \
        -I$(top_builddir)/src/mesa/main \
-       -I$(top_srcdir)/src/mapi \
+       -I$(top_srcdir)/src/mesa/main \
        -I$(top_builddir)/src/mapi \
+       -I$(top_srcdir)/src/mapi \
        -I$(top_srcdir)/src/gallium/include \
        -I$(top_srcdir)/src/gallium/auxiliary
index 434800e..8f41174 100644 (file)
@@ -15,13 +15,13 @@ env.MSVC2013Compat()
 env.Append(CPPPATH = [
     '../compiler/nir',  # for generated nir_opcodes.h, etc
     '#/src',
+    Dir('../mapi'), # src/mapi build path
     '#/src/mapi',
     '#/src/glsl',
+    Dir('.'), # src/mesa build path
     '#/src/mesa',
     '#/src/gallium/include',
     '#/src/gallium/auxiliary',
-    Dir('../mapi'), # src/mapi build path
-    Dir('.'), # src/mesa build path
 ])
 
 if env['platform'] == 'windows':
@@ -116,7 +116,7 @@ if env['platform'] not in ('cygwin', 'darwin', 'windows', 'haiku'):
         )
         # Add the dir containing the generated header (somewhere inside  the
         # build dir) to the include path
-        env.Append(CPPPATH = [matypes[0].dir])
+        env.Prepend(CPPPATH = [matypes[0].dir])
 
 
 def write_git_sha1_h_file(filename):
index e5a3f00..5607d5b 100644 (file)
@@ -858,6 +858,7 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
    struct __DRIimageList images;
    unsigned int format;
    uint32_t buffer_mask = 0;
+   int ret;
 
    front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
    back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
@@ -877,12 +878,14 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
    if (back_rb)
       buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
 
-   (*screen->image.loader->getBuffers) (drawable,
-                                        driGLFormatToImageFormat(format),
-                                        &drawable->dri2.stamp,
-                                        drawable->loaderPrivate,
-                                        buffer_mask,
-                                        &images);
+   ret = screen->image.loader->getBuffers(drawable,
+                                          driGLFormatToImageFormat(format),
+                                          &drawable->dri2.stamp,
+                                          drawable->loaderPrivate,
+                                          buffer_mask,
+                                          &images);
+   if (!ret)
+      return;
 
    if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
       drawable->w = images.front->width;
index f448551..194b412 100644 (file)
@@ -48,6 +48,7 @@ i965_compiler_FILES = \
        brw_nir_attribute_workarounds.c \
        brw_nir_intrinsics.c \
        brw_nir_opt_peephole_ffma.c \
+       brw_nir_tcs_workarounds.c \
        brw_packed_float.c \
        brw_predicated_break.cpp \
        brw_reg.h \
index 10e9f47..7d15c28 100644 (file)
@@ -220,6 +220,8 @@ struct brw_tcs_prog_key
    /** A bitfield of per-vertex outputs written. */
    uint64_t outputs_written;
 
+   bool quads_workaround;
+
    struct brw_sampler_prog_key_data tex;
 };
 
index 1cb99da..2af42e0 100644 (file)
@@ -5681,7 +5681,7 @@ fs_visitor::setup_gs_payload()
     * have to multiply by VerticesIn to obtain the total storage requirement.
     */
    if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
-       max_push_components) {
+       max_push_components || gs_prog_data->invocations > 1) {
       gs_prog_data->base.include_vue_handles = true;
 
       /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
index 11c078a..91763d3 100644 (file)
@@ -2322,23 +2322,23 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
          break;
 
       fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-      fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t));
+      fs_reg m0_2 = component(m0, 2);
 
-      const fs_builder fwa_bld = bld.exec_all();
+      const fs_builder chanbld = bld.exec_all().group(1, 0);
 
       /* Zero the message header */
-      fwa_bld.MOV(m0, brw_imm_ud(0u));
+      bld.exec_all().MOV(m0, brw_imm_ud(0u));
 
       /* Copy "Barrier ID" from r0.2, bits 16:13 */
-      fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+      chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
                   brw_imm_ud(INTEL_MASK(16, 13)));
 
       /* Shift it up to bits 27:24. */
-      fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11));
+      chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
 
       /* Set the Barrier Count and the enable bit */
-      fwa_bld.OR(m0_2, m0_2,
-                 brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
+      chanbld.OR(m0_2, m0_2,
+                 brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
 
       bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
       break;
@@ -4060,12 +4060,23 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
       dest = get_nir_dest(instr->dest);
 
    fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
-   fs_reg offset = get_nir_src(instr->src[0]);
+   fs_reg offset;
    fs_reg data1 = get_nir_src(instr->src[1]);
    fs_reg data2;
    if (op == BRW_AOP_CMPWR)
       data2 = get_nir_src(instr->src[2]);
 
+   /* Get the offset */
+   nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+   if (const_offset) {
+      offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+   } else {
+      offset = vgrf(glsl_type::uint_type);
+      bld.ADD(offset,
+             retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+             brw_imm_ud(instr->const_index[0]));
+   }
+
    /* Emit the actual atomic operation operation */
 
    fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
index 74c354f..6185310 100644 (file)
@@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
 
 bool brw_nir_apply_trig_workarounds(nir_shader *nir);
 
+void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
+
 nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
                                       const struct brw_device_info *devinfo,
                                       const struct brw_sampler_prog_key_data *key,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
new file mode 100644 (file)
index 0000000..0626981
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright Â© 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "brw_nir.h"
+
+/**
+ * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
+ * definition of the patch header layouts):
+ *
+ *    "HW Bug: The Tessellation stage will incorrectly add domain points
+ *     along patch edges under the following conditions, which may result
+ *     in conformance failures and/or cracking artifacts:
+ *
+ *       * QUAD domain
+ *       * INTEGER partitioning
+ *       * All three TessFactors in a given U or V direction (e.g., V
+ *         direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
+ *       * All three TessFactors in the other direction are > 1.0 and all
+ *         round up to the same integer value (e.g, U direction:
+ *         VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
+ *
+ *     The suggested workaround (to be implemented as part of the postamble
+ *     to the HS shader in the HS kernel) is:
+ *
+ *     if (
+ *        (TF[UEQ0] > 1.0) ||
+ *        (TF[VEQ0] > 1.0) ||
+ *        (TF[UEQ1] > 1.0) ||
+ *        (TF[VEQ1] > 1.0) ||
+ *        (TF[INSIDE_U] > 1.0) ||
+ *        (TF[INSIDE_V] > 1.0) )
+ *     {
+ *        TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
+ *        TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
+ *     }"
+ *
+ * There's a subtlety here.  Intel internal HSD-ES bug 1208668495 notes
+ * that the above workaround fails to fix certain GL/ES CTS tests which
+ * have inside tessellation factors of -1.0.  This can be explained by
+ * a quote from the ARB_tessellation_shader specification:
+ *
+ *    "If "equal_spacing" is used, the floating-point tessellation level is
+ *     first clamped to the range [1,<max>], where <max> is implementation-
+ *     dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
+ *
+ * In other words, the actual inner tessellation factor used is
+ * clamp(TF[INSIDE_*], 1.0, 64.0).  So we want to compare the clamped
+ * value against 1.0.  To accomplish this, we change the comparison from
+ * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
+ */
+
+static inline nir_ssa_def *
+load_output(nir_builder *b, int num_components, int offset)
+{
+   nir_intrinsic_instr *load =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
+   nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
+   load->num_components = num_components;
+   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+   nir_intrinsic_set_base(load, offset);
+
+   nir_builder_instr_insert(b, &load->instr);
+
+   return &load->dest.ssa;
+}
+
+static inline void
+store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
+{
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+   store->num_components = comps;
+   nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
+   store->src[0] = nir_src_for_ssa(value);
+   store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+emit_quads_workaround(nir_builder *b, nir_block *block)
+{
+   /* We're going to insert a new if-statement in a predecessor of the end
+    * block.  This would normally create a new block (after the if) which
+    * would then become the predecessor of the end block, causing our set
+    * walking to get screwed up.  To avoid this, just emit a constant at
+    * the end of our current block, and insert the if before that.
+    */
+   b->cursor = nir_after_block_before_jump(block);
+   b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr);
+
+   nir_ssa_def *inner = load_output(b, 2, 0);
+   nir_ssa_def *outer = load_output(b, 4, 1);
+
+   nir_ssa_def *any_greater_than_1 =
+       nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
+                  nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
+
+   nir_if *if_stmt = nir_if_create(b->shader);
+   if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
+   nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+   /* Fill out the new then-block */
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
+                                nir_imm_float(b, 2.0f), inner), 0, 2);
+}
+
+void
+brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
+{
+   assert(nir->stage == MESA_SHADER_TESS_CTRL);
+
+   nir_foreach_function(func, nir) {
+      if (!func->impl)
+         continue;
+
+      nir_builder b;
+      nir_builder_init(&b, func->impl);
+
+      struct set_entry *entry;
+      set_foreach(func->impl->end_block->predecessors, entry) {
+         nir_block *pred = (nir_block *) entry->key;
+         emit_quads_workaround(&b, pred);
+      }
+
+      nir_metadata_preserve(func->impl, 0);
+   }
+}
index a91c6e2..a42a322 100644 (file)
@@ -686,12 +686,12 @@ stop_oa_counters(struct brw_context *brw)
  * The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
  * including the required PIPE_CONTROL flushes.
  *
- * Sandybridge is the worst case scenario: brw_emit_mi_flush
- * expands to three PIPE_CONTROLs which are 4 DWords each.  We have to flush
- * before and after MI_REPORT_PERF_COUNT, so multiply by two.  Finally, add
- * the 3 DWords for MI_REPORT_PERF_COUNT itself.
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush expands to four
+ * PIPE_CONTROLs which are 5 DWords each.  We have to flush before and after
+ * MI_REPORT_PERF_COUNT, so multiply by two.  Finally, add the 3 DWords for
+ * MI_REPORT_PERF_COUNT itself.
  */
-#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3)
+#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (4 * 5) + 3)
 
 /**
  * Emit an MI_REPORT_PERF_COUNT command packet.
index 4672efd..d51cf1b 100644 (file)
@@ -96,10 +96,38 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
 void
 brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
 {
+   if (brw->gen >= 6 &&
+       (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
+       (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
+      /* A pipe control command with flush and invalidate bits set
+       * simultaneously is an inherently racy operation on Gen6+ if the
+       * contents of the flushed caches were intended to become visible from
+       * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
+       * first one should stall the pipeline to make sure that the flushed R/W
+       * caches are coherent with memory once the specified R/O caches are
+       * invalidated.  On pre-Gen6 hardware the (implicit) R/O cache
+       * invalidation seems to happen at the bottom of the pipeline together
+       * with any write cache flush, so this shouldn't be a concern.
+       */
+      brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
+                                       PIPE_CONTROL_CS_STALL);
+      flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
+   }
+
    if (brw->gen >= 8) {
       if (brw->gen == 8)
          gen8_add_cs_stall_workaround_bits(&flags);
 
+      if (brw->gen == 9 &&
+          (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
+         /* Hardware workaround: SKL
+          *
+          * Emit Pipe Control with all bits set to zero before emitting
+          * a Pipe Control with VF Cache Invalidate set.
+          */
+         brw_emit_pipe_control_flush(brw, 0);
+      }
+
       BEGIN_BATCH(6);
       OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
       OUT_BATCH(flags);
@@ -311,15 +339,6 @@ brw_emit_mi_flush(struct brw_context *brw)
    } else {
       int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
       if (brw->gen >= 6) {
-         if (brw->gen == 9) {
-            /* Hardware workaround: SKL
-             *
-             * Emit Pipe Control with all bits set to zero before emitting
-             * a Pipe Control with VF Cache Invalidate set.
-             */
-            brw_emit_pipe_control_flush(brw, 0);
-         }
-
          flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                   PIPE_CONTROL_VF_CACHE_INVALIDATE |
index 8a5dd7e..6b7fde2 100644 (file)
@@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw,
                       key->patch_outputs_written);
    found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
                       key->tes_primitive_mode);
+   found |= key_debug(brw, "quads and equal_spacing workaround",
+                      old_key->quads_workaround, key->quads_workaround);
    found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
 
    if (!found) {
@@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw,
     * based on the domain the DS is expecting to tessellate.
     */
    key.tes_primitive_mode = tep->program.PrimitiveMode;
+   key.quads_workaround = brw->gen < 9 &&
+                          tep->program.PrimitiveMode == GL_QUADS &&
+                          tep->program.Spacing == GL_EQUAL;
 
    if (tcp) {
       key.program_string_id = tcp->id;
@@ -383,6 +388,8 @@ brw_tcs_precompile(struct gl_context *ctx,
 
    struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog;
    struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp);
+   const struct gl_shader *tes =
+      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
 
    memset(&key, 0, sizeof(key));
 
@@ -393,9 +400,14 @@ brw_tcs_precompile(struct gl_context *ctx,
    if (brw->gen < 8)
       key.input_vertices = shader_prog->TessCtrl.VerticesOut;
 
-   key.tes_primitive_mode =
-      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ?
-      shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES;
+   if (tes) {
+      key.tes_primitive_mode = shader_prog->TessEval.PrimitiveMode;
+      key.quads_workaround = brw->gen < 9 &&
+                             shader_prog->TessEval.PrimitiveMode == GL_QUADS &&
+                             shader_prog->TessEval.Spacing == GL_EQUAL;
+   } else {
+      key.tes_primitive_mode = GL_TRIANGLES;
+   }
 
    key.outputs_written = prog->OutputsWritten;
    key.patch_outputs_written = prog->PatchOutputsWritten;
index 162b481..a7398a7 100644 (file)
@@ -354,95 +354,97 @@ vec4_visitor::opt_vector_float()
 {
    bool progress = false;
 
-   int last_reg = -1, last_reg_offset = -1;
-   enum brw_reg_file last_reg_file = BAD_FILE;
+   foreach_block(block, cfg) {
+      int last_reg = -1, last_reg_offset = -1;
+      enum brw_reg_file last_reg_file = BAD_FILE;
+
+      uint8_t imm[4] = { 0 };
+      int inst_count = 0;
+      vec4_instruction *imm_inst[4];
+      unsigned writemask = 0;
+      enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+
+      foreach_inst_in_block_safe(vec4_instruction, inst, block) {
+         int vf = -1;
+         enum brw_reg_type need_type;
+
+         /* Look for unconditional MOVs from an immediate with a partial
+          * writemask.  Skip type-conversion MOVs other than integer 0,
+          * where the type doesn't matter.  See if the immediate can be
+          * represented as a VF.
+          */
+         if (inst->opcode == BRW_OPCODE_MOV &&
+             inst->src[0].file == IMM &&
+             inst->predicate == BRW_PREDICATE_NONE &&
+             inst->dst.writemask != WRITEMASK_XYZW &&
+             (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
+
+            vf = brw_float_to_vf(inst->src[0].d);
+            need_type = BRW_REGISTER_TYPE_D;
+
+            if (vf == -1) {
+               vf = brw_float_to_vf(inst->src[0].f);
+               need_type = BRW_REGISTER_TYPE_F;
+            }
+         } else {
+            last_reg = -1;
+         }
 
-   uint8_t imm[4] = { 0 };
-   int inst_count = 0;
-   vec4_instruction *imm_inst[4];
-   unsigned writemask = 0;
-   enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+         /* If this wasn't a MOV, or the destination register doesn't match,
+          * or we have to switch destination types, then this breaks our
+          * sequence.  Combine anything we've accumulated so far.
+          */
+         if (last_reg != inst->dst.nr ||
+             last_reg_offset != inst->dst.reg_offset ||
+             last_reg_file != inst->dst.file ||
+             (vf > 0 && dest_type != need_type)) {
+
+            if (inst_count > 1) {
+               unsigned vf;
+               memcpy(&vf, imm, sizeof(vf));
+               vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
+               mov->dst.type = dest_type;
+               mov->dst.writemask = writemask;
+               inst->insert_before(block, mov);
+
+               for (int i = 0; i < inst_count; i++) {
+                  imm_inst[i]->remove(block);
+               }
 
-   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      int vf = -1;
-      enum brw_reg_type need_type;
+               progress = true;
+            }
 
-      /* Look for unconditional MOVs from an immediate with a partial
-       * writemask.  Skip type-conversion MOVs other than integer 0,
-       * where the type doesn't matter.  See if the immediate can be
-       * represented as a VF.
-       */
-      if (inst->opcode == BRW_OPCODE_MOV &&
-          inst->src[0].file == IMM &&
-          inst->predicate == BRW_PREDICATE_NONE &&
-          inst->dst.writemask != WRITEMASK_XYZW &&
-          (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
-
-         vf = brw_float_to_vf(inst->src[0].d);
-         need_type = BRW_REGISTER_TYPE_D;
-
-         if (vf == -1) {
-            vf = brw_float_to_vf(inst->src[0].f);
-            need_type = BRW_REGISTER_TYPE_F;
-         }
-      } else {
-         last_reg = -1;
-      }
+            inst_count = 0;
+            last_reg = -1;
+            writemask = 0;
+            dest_type = BRW_REGISTER_TYPE_F;
 
-      /* If this wasn't a MOV, or the destination register doesn't match,
-       * or we have to switch destination types, then this breaks our
-       * sequence.  Combine anything we've accumulated so far.
-       */
-      if (last_reg != inst->dst.nr ||
-          last_reg_offset != inst->dst.reg_offset ||
-          last_reg_file != inst->dst.file ||
-          (vf > 0 && dest_type != need_type)) {
-
-         if (inst_count > 1) {
-            unsigned vf;
-            memcpy(&vf, imm, sizeof(vf));
-            vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
-            mov->dst.type = dest_type;
-            mov->dst.writemask = writemask;
-            inst->insert_before(block, mov);
-
-            for (int i = 0; i < inst_count; i++) {
-               imm_inst[i]->remove(block);
+            for (int i = 0; i < 4; i++) {
+               imm[i] = 0;
             }
-
-            progress = true;
          }
 
-         inst_count = 0;
-         last_reg = -1;
-         writemask = 0;
-         dest_type = BRW_REGISTER_TYPE_F;
-
-         for (int i = 0; i < 4; i++) {
-            imm[i] = 0;
+         /* Record this instruction's value (if it was representable). */
+         if (vf != -1) {
+            if ((inst->dst.writemask & WRITEMASK_X) != 0)
+               imm[0] = vf;
+            if ((inst->dst.writemask & WRITEMASK_Y) != 0)
+               imm[1] = vf;
+            if ((inst->dst.writemask & WRITEMASK_Z) != 0)
+               imm[2] = vf;
+            if ((inst->dst.writemask & WRITEMASK_W) != 0)
+               imm[3] = vf;
+
+            writemask |= inst->dst.writemask;
+            imm_inst[inst_count++] = inst;
+
+            last_reg = inst->dst.nr;
+            last_reg_offset = inst->dst.reg_offset;
+            last_reg_file = inst->dst.file;
+            if (vf > 0)
+               dest_type = need_type;
          }
       }
-
-      /* Record this instruction's value (if it was representable). */
-      if (vf != -1) {
-         if ((inst->dst.writemask & WRITEMASK_X) != 0)
-            imm[0] = vf;
-         if ((inst->dst.writemask & WRITEMASK_Y) != 0)
-            imm[1] = vf;
-         if ((inst->dst.writemask & WRITEMASK_Z) != 0)
-            imm[2] = vf;
-         if ((inst->dst.writemask & WRITEMASK_W) != 0)
-            imm[3] = vf;
-
-         writemask |= inst->dst.writemask;
-         imm_inst[inst_count++] = inst;
-
-         last_reg = inst->dst.nr;
-         last_reg_offset = inst->dst.reg_offset;
-         last_reg_file = inst->dst.file;
-         if (vf > 0)
-            dest_type = need_type;
-      }
    }
 
    if (progress)
@@ -1109,7 +1111,7 @@ vec4_visitor::opt_register_coalesce()
       /* Can't coalesce this GRF if someone else was going to
        * read it later.
        */
-      if (var_range_end(var_from_reg(alloc, inst->src[0]), 4) > ip)
+      if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip)
         continue;
 
       /* We need to check interference with the final destination between this
index 0c1f0c3..10898a5 100644 (file)
@@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
              * more -- a sure sign they'll fail operands_match().
              */
             if (src->file == VGRF) {
-               if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
+               if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) {
                   entry->remove();
                   ralloc_free(entry);
                   break;
index f61c612..5440dba 100644 (file)
@@ -451,6 +451,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
    nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
    brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
    brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
+   if (key->quads_workaround)
+      brw_nir_apply_tcs_quads_workaround(nir);
+
    nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
 
    if (is_scalar)
index aa1dc38..67e8e8f 100644 (file)
@@ -21,13 +21,13 @@ extern "C" {
  *   - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
  *   - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
  *   - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
- *     - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB,
- *       which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes
+ *     - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB,
+ *       which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes
  *     - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+.  ==> 12 bytes.
  *       On Ironlake, it's 6 DWords, but we have some slack due to the lack of
  *       Sandybridge PIPE_CONTROL madness.
- *   - CC_STATE workaround on HSW (12 * 4 = 48 bytes)
- *     - 5 dwords for initial mi_flush
+ *   - CC_STATE workaround on HSW (17 * 4 = 68 bytes)
+ *     - 10 dwords for initial mi_flush
  *     - 2 dwords for CC state setup
  *     - 5 dwords for the required pipe control at the end
  *   - Restoring L3 configuration: (24 dwords = 96 bytes)
@@ -35,7 +35,7 @@ extern "C" {
  *     - 7 dwords for L3 configuration set-up.
  *     - 5 dwords for L3 atomic set-up (on HSW).
  */
-#define BATCH_RESERVED 248
+#define BATCH_RESERVED 308
 
 struct intel_batchbuffer;
 
index 939f9a0..8a0d2ad 100644 (file)
@@ -374,6 +374,19 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
    if (!irb->mt)
       return;
 
+   /* Adjust the miptree's upper-left coordinate.
+    *
+    * FIXME: Adjusting the miptree's layout outside of
+    * intel_miptree_create_layout() is fragile. Plumb the adjustment through
+    * intel_miptree_create_layout() and brw_tex_layout().
+    */
+   irb->mt->level[0].level_x = image->tile_x;
+   irb->mt->level[0].level_y = image->tile_y;
+   irb->mt->level[0].slice[0].x_offset = image->tile_x;
+   irb->mt->level[0].slice[0].y_offset = image->tile_y;
+   irb->mt->total_width += image->tile_x;
+   irb->mt->total_height += image->tile_y;
+
    rb->InternalFormat = image->internal_format;
    rb->Width = image->width;
    rb->Height = image->height;
index b6265dc..c234f24 100644 (file)
@@ -366,25 +366,8 @@ intel_miptree_create_layout(struct brw_context *brw,
        _mesa_get_format_name(format),
        first_level, last_level, depth0, mt);
 
-   if (target == GL_TEXTURE_1D_ARRAY) {
-      /* For a 1D Array texture the OpenGL API will treat the height0
-       * parameter as the number of array slices. For Intel hardware, we treat
-       * the 1D array as a 2D Array with a height of 1.
-       *
-       * So, when we first come through this path to create a 1D Array
-       * texture, height0 stores the number of slices, and depth0 is 1. In
-       * this case, we want to swap height0 and depth0.
-       *
-       * Since some miptrees will be created based on the base miptree, we may
-       * come through this path and see height0 as 1 and depth0 being the
-       * number of slices. In this case we don't need to do the swap.
-       */
-      assert(height0 == 1 || depth0 == 1);
-      if (height0 > 1) {
-         depth0 = height0;
-         height0 = 1;
-      }
-   }
+   if (target == GL_TEXTURE_1D_ARRAY)
+      assert(height0 == 1);
 
    mt->target = target;
    mt->format = format;
@@ -1050,6 +1033,7 @@ intel_get_image_dims(struct gl_texture_image *image,
        * as a 2D Array with a height of 1. So, here we want to swap image
        * height and depth.
        */
+      assert(image->Depth == 1);
       *width = image->Width;
       *height = 1;
       *depth = image->Height;
@@ -1805,14 +1789,8 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
       hz_height = DIV_ROUND_UP(hz_height, 2);
    } else {
       const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align);
-      if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
-          mt->target == GL_TEXTURE_CUBE_MAP) {
-         /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 */
-         hz_height = DIV_ROUND_UP(hz_qpitch * Z0 * 6, 2 * 8) * 8;
-      } else {
-         /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
-         hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
-      }
+      /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
+      hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
    }
 
    unsigned long pitch;
@@ -1909,15 +1887,6 @@ intel_gen8_hiz_buf_create(struct brw_context *brw,
    } else {
       /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
       hz_height = DIV_ROUND_UP(buf->qpitch, 2 * 8) * 8 * Z0;
-      if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
-          mt->target == GL_TEXTURE_CUBE_MAP) {
-         /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * 6 * Z_Depth
-          *
-          * We can can just take our hz_height calculation from above, and
-          * multiply by 6 for the cube map and cube map array types.
-          */
-         hz_height *= 6;
-      }
    }
 
    unsigned long pitch;
index a486d6e..cacd7e2 100644 (file)
@@ -110,22 +110,6 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
    if (ctx->_ImageTransferState)
       return false;
 
-   /* This renderbuffer can come from a texture.  In this case, we impose
-    * some of the same restrictions we have for textures and adjust for
-    * miplevels.
-    */
-   if (rb->TexImage) {
-      if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D &&
-          rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE)
-         return false;
-
-      int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel;
-
-      /* Adjust x and y offset based on miplevel */
-      xoffset += irb->mt->level[level].level_x;
-      yoffset += irb->mt->level[level].level_y;
-   }
-
    /* It is possible that the renderbuffer (or underlying texture) is
     * multisampled.  Since ReadPixels from a multisampled buffer requires a
     * multisample resolve, we can't handle this here
@@ -169,6 +153,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
       return false;
    }
 
+   xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset;
+   yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset;
+
    dst_pitch = _mesa_image_row_stride(pack, width, format, type);
 
    /* For a window-system renderbuffer, the buffer is actually flipped
@@ -201,7 +188,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
       pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
-      bo->virtual,
+      bo->virtual + irb->mt->offset,
       dst_pitch, irb->mt->pitch,
       brw->has_swizzling,
       irb->mt->tiling,
index 95365fe..7a82be4 100644 (file)
 #define PIPE_CONTROL_PPGTT_WRITE       (0 << 2)
 #define PIPE_CONTROL_GLOBAL_GTT_WRITE  (1 << 2)
 
+#define PIPE_CONTROL_CACHE_FLUSH_BITS \
+   (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
+    PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
+   (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
+    PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+    PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
 /** @} */
 
 #define XY_SETUP_BLT_CMD               (CMD_2D | (0x01 << 22))
index 3e359a5..20c58d3 100644 (file)
@@ -49,6 +49,7 @@ struct brw_fence {
    /** The fence waits for completion of this batch. */
    drm_intel_bo *batch_bo;
 
+   mtx_t mutex;
    bool signalled;
 };
 
@@ -77,7 +78,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
 }
 
 static bool
-brw_fence_has_completed(struct brw_fence *fence)
+brw_fence_has_completed_locked(struct brw_fence *fence)
 {
    if (fence->signalled)
       return true;
@@ -92,13 +93,21 @@ brw_fence_has_completed(struct brw_fence *fence)
    return false;
 }
 
-/**
- * Return true if the function successfully signals or has already signalled.
- * (This matches the behavior expected from __DRI2fence::client_wait_sync).
- */
 static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
-                      uint64_t timeout)
+brw_fence_has_completed(struct brw_fence *fence)
+{
+   bool ret;
+
+   mtx_lock(&fence->mutex);
+   ret = brw_fence_has_completed_locked(fence);
+   mtx_unlock(&fence->mutex);
+
+   return ret;
+}
+
+static bool
+brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
+                             uint64_t timeout)
 {
    if (fence->signalled)
       return true;
@@ -123,6 +132,23 @@ brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
    return true;
 }
 
+/**
+ * Return true if the function successfully signals or has already signalled.
+ * (This matches the behavior expected from __DRI2fence::client_wait_sync).
+ */
+static bool
+brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
+                      uint64_t timeout)
+{
+   bool ret;
+
+   mtx_lock(&fence->mutex);
+   ret = brw_fence_client_wait_locked(brw, fence, timeout);
+   mtx_unlock(&fence->mutex);
+
+   return ret;
+}
+
 static void
 brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
 {
@@ -215,6 +241,7 @@ intel_dri_create_fence(__DRIcontext *ctx)
    if (!fence)
       return NULL;
 
+   mtx_init(&fence->mutex, mtx_plain);
    fence->brw = brw;
    brw_fence_insert(brw, fence);
 
@@ -244,6 +271,12 @@ intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags
 {
    struct brw_fence *fence = driver_fence;
 
+   /* We might be called here with a NULL fence as a result of WaitSyncKHR
+    * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
+    */
+   if (!fence)
+      return;
+
    brw_fence_server_wait(fence->brw, fence);
 }
 
index cac33ac..a1364b9 100644 (file)
@@ -140,6 +140,8 @@ intel_alloc_texture_storage(struct gl_context *ctx,
        !intel_miptree_match_image(intel_texobj->mt, first_image) ||
        intel_texobj->mt->last_level != levels - 1) {
       intel_miptree_release(&intel_texobj->mt);
+
+      intel_get_image_dims(first_image, &width, &height, &depth);
       intel_texobj->mt = intel_miptree_create(brw, texobj->Target,
                                               first_image->TexFormat,
                                               0, levels - 1,
index 492ecdc..2c5c2db 100644 (file)
@@ -67,5 +67,5 @@ nv20_emit_frag(struct gl_context *ctx, int emit)
        PUSH_DATA (push, in >> 32);
 
        BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1);
-       PUSH_DATA (push, n);
+       PUSH_DATA (push, MAX2(1, n));
 }
index 2d4bb70..6e006f8 100644 (file)
@@ -484,14 +484,14 @@ swrast_map_renderbuffer(struct gl_context *ctx,
 
       xrb->map_mode = mode;
       xrb->map_x = x;
-      xrb->map_y = y;
+      xrb->map_y = rb->Height - y - h;
       xrb->map_w = w;
       xrb->map_h = h;
 
       stride = w * cpp;
       xrb->Base.Buffer = malloc(h * stride);
 
-      sPriv->swrast_loader->getImage(dPriv, x, rb->Height - y - h, w, h,
+      sPriv->swrast_loader->getImage(dPriv, x, xrb->map_y, w, h,
                                     (char *) xrb->Base.Buffer,
                                     dPriv->loaderPrivate);
 
index d72bc71..18dffc3 100644 (file)
@@ -293,10 +293,9 @@ struct ureg {
    GLuint file:4;
    GLint idx:9;      /* relative addressing may be negative */
                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
-   GLuint abs:1;
    GLuint negate:1;
    GLuint swz:12;
-   GLuint pad:5;
+   GLuint pad:6;
 };
 
 
@@ -325,7 +324,6 @@ static const struct ureg undef = {
    0,
    0,
    0,
-   0,
    0
 };
 
@@ -344,7 +342,6 @@ static struct ureg make_ureg(GLuint file, GLint idx)
    struct ureg reg;
    reg.file = file;
    reg.idx = idx;
-   reg.abs = 0;
    reg.negate = 0;
    reg.swz = SWIZZLE_NOOP;
    reg.pad = 0;
@@ -352,15 +349,6 @@ static struct ureg make_ureg(GLuint file, GLint idx)
 }
 
 
-
-static struct ureg absolute( struct ureg reg )
-{
-   reg.abs = 1;
-   reg.negate = 0;
-   return reg;
-}
-
-
 static struct ureg negate( struct ureg reg )
 {
    reg.negate ^= 1;
@@ -961,7 +949,8 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
 
       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
-      emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
+      emit_op1(p, OPCODE_ABS, spot, 0, spot);
+      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 
       release_temp(p, spot);
index d917220..2afe7be 100644 (file)
@@ -85,10 +85,15 @@ _mesa_is_valid_generate_texture_mipmap_internalformat(struct gl_context *ctx,
        *  not specified with an unsized internal format from table 8.3 or a
        *  sized internal format that is both color-renderable and
        *  texture-filterable according to table 8.10."
+       *
+       * GL_EXT_texture_format_BGRA8888 adds a GL_BGRA_EXT unsized internal
+       * format, and includes it in a very similar looking table.  So we
+       * include it here as well.
        */
       return internalformat == GL_RGBA || internalformat == GL_RGB ||
              internalformat == GL_LUMINANCE_ALPHA ||
              internalformat == GL_LUMINANCE || internalformat == GL_ALPHA ||
+             internalformat == GL_BGRA_EXT ||
              (_mesa_is_es3_color_renderable(internalformat) &&
               _mesa_is_es3_texture_filterable(internalformat));
    }
@@ -144,6 +149,11 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
       return;
    }
 
+   if (srcImage->Width == 0 || srcImage->Height == 0) {
+      _mesa_unlock_texture(ctx, texObj);
+      return;
+   }
+
    if (target == GL_TEXTURE_CUBE_MAP) {
       GLuint face;
       for (face = 0; face < 6; face++) {
index 9f70749..dc8c9eb 100644 (file)
@@ -411,6 +411,7 @@ static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = {
 static const int extra_ARB_gpu_shader5_or_OES_sample_variables[] = {
    EXT(ARB_gpu_shader5),
    EXT(OES_sample_variables),
+   EXTRA_END
 };
 
 EXTRA_EXT(ARB_texture_cube_map);
index 24ce7b0..90f525c 100644 (file)
@@ -907,6 +907,29 @@ _mesa_is_astc_format(GLenum internalFormat)
 }
 
 /**
+ * Test if the given format is an ETC2 format.
+ */
+GLboolean
+_mesa_is_etc2_format(GLenum internalFormat)
+{
+   switch (internalFormat) {
+   case GL_COMPRESSED_RGB8_ETC2:
+   case GL_COMPRESSED_SRGB8_ETC2:
+   case GL_COMPRESSED_RGBA8_ETC2_EAC:
+   case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+   case GL_COMPRESSED_R11_EAC:
+   case GL_COMPRESSED_RG11_EAC:
+   case GL_COMPRESSED_SIGNED_R11_EAC:
+   case GL_COMPRESSED_SIGNED_RG11_EAC:
+   case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+   case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+      return true;
+   default:
+      return false;
+   }
+}
+
+/**
  * Test if the given format is an integer (non-normalized) format.
  */
 GLboolean
index c73f464..474ede2 100644 (file)
@@ -61,6 +61,9 @@ extern GLboolean
 _mesa_is_astc_format(GLenum internalFormat);
 
 extern GLboolean
+_mesa_is_etc2_format(GLenum internalFormat);
+
+extern GLboolean
 _mesa_is_type_unsigned(GLenum type);
 
 extern GLboolean
index 5956ce4..35ce0f2 100644 (file)
@@ -1385,13 +1385,24 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg,
 
 static bool
 validate_io(struct gl_shader_program *producer,
-            struct gl_shader_program *consumer)
+            struct gl_shader_program *consumer,
+            gl_shader_stage producer_stage,
+            gl_shader_stage consumer_stage)
 {
    if (producer == consumer)
       return true;
 
+   const bool nonarray_stage_to_array_stage =
+      producer_stage != MESA_SHADER_TESS_CTRL &&
+      (consumer_stage == MESA_SHADER_GEOMETRY ||
+       consumer_stage == MESA_SHADER_TESS_CTRL ||
+       consumer_stage == MESA_SHADER_TESS_EVAL);
+
    bool valid = true;
 
+   void *name_buffer = NULL;
+   size_t name_buffer_size = 0;
+
    gl_shader_variable const **outputs =
       (gl_shader_variable const **) calloc(producer->NumProgramResourceList,
                                            sizeof(gl_shader_variable *));
@@ -1463,11 +1474,52 @@ validate_io(struct gl_shader_program *producer,
             }
          }
       } else {
+         char *consumer_name = consumer_var->name;
+
+         if (nonarray_stage_to_array_stage &&
+             consumer_var->interface_type != NULL &&
+             consumer_var->interface_type->is_array() &&
+             !is_gl_identifier(consumer_var->name)) {
+            const size_t name_len = strlen(consumer_var->name);
+
+            if (name_len >= name_buffer_size) {
+               free(name_buffer);
+
+               name_buffer_size = name_len + 1;
+               name_buffer = malloc(name_buffer_size);
+               if (name_buffer == NULL) {
+                  valid = false;
+                  goto out;
+               }
+            }
+
+            consumer_name = (char *) name_buffer;
+
+            char *s = strchr(consumer_var->name, '[');
+            if (s == NULL) {
+               valid = false;
+               goto out;
+            }
+
+            char *t = strchr(s, ']');
+            if (t == NULL) {
+               valid = false;
+               goto out;
+            }
+
+            assert(t[1] == '.' || t[1] == '[');
+
+            const ptrdiff_t base_name_len = s - consumer_var->name;
+
+            memcpy(consumer_name, consumer_var->name, base_name_len);
+            strcpy(consumer_name + base_name_len, t + 1);
+         }
+
          for (unsigned j = 0; j < num_outputs; j++) {
             const gl_shader_variable *const var = outputs[j];
 
             if (!var->explicit_location &&
-                strcmp(consumer_var->name, var->name) == 0) {
+                strcmp(consumer_name, var->name) == 0) {
                producer_var = var;
                match_index = j;
                break;
@@ -1529,25 +1581,53 @@ validate_io(struct gl_shader_program *producer,
        * Note that location mismatches are detected by the loops above that
        * find the producer variable that goes with the consumer variable.
        */
-      if (producer_var->type != consumer_var->type ||
-          producer_var->interpolation != consumer_var->interpolation ||
-          producer_var->precision != consumer_var->precision) {
+      if (nonarray_stage_to_array_stage) {
+         if (!consumer_var->type->is_array() ||
+             consumer_var->type->fields.array != producer_var->type) {
+            valid = false;
+            goto out;
+         }
+
+         if (consumer_var->interface_type != NULL) {
+            if (!consumer_var->interface_type->is_array() ||
+                consumer_var->interface_type->fields.array != producer_var->interface_type) {
+               valid = false;
+               goto out;
+            }
+         } else if (producer_var->interface_type != NULL) {
+            valid = false;
+            goto out;
+         }
+      } else {
+         if (producer_var->type != consumer_var->type) {
+            valid = false;
+            goto out;
+         }
+
+         if (producer_var->interface_type != consumer_var->interface_type) {
+            valid = false;
+            goto out;
+         }
+      }
+
+      if (producer_var->interpolation != consumer_var->interpolation) {
          valid = false;
          goto out;
       }
 
-      if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) {
+      if (producer_var->precision != consumer_var->precision) {
          valid = false;
          goto out;
       }
 
-      if (producer_var->interface_type != consumer_var->interface_type) {
+      if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) {
          valid = false;
          goto out;
       }
    }
 
  out:
+   free(name_buffer);
    free(outputs);
    return valid && num_outputs == 0;
 }
@@ -1579,7 +1659,9 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline)
          if (shProg[idx]->_LinkedShaders[idx]->Stage == MESA_SHADER_COMPUTE)
             break;
 
-         if (!validate_io(shProg[prev], shProg[idx]))
+         if (!validate_io(shProg[prev], shProg[idx],
+                          shProg[prev]->_LinkedShaders[prev]->Stage,
+                          shProg[idx]->_LinkedShaders[idx]->Stage))
             return false;
 
          prev = idx;
index fc3cc6b..3dde03f 100644 (file)
@@ -502,13 +502,15 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
           */
          if (format == rgba_format) {
             rgba = dest;
-         } else if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+         } else {
             need_convert = true;
-            rgba = malloc(height * rgba_stride);
-            if (!rgba) {
-               _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
-               ctx->Driver.UnmapTextureImage(ctx, texImage, img);
-               return;
+            if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+               rgba = malloc(height * rgba_stride);
+               if (!rgba) {
+                  _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
+                  ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+                  return;
+               }
             }
          }
 
index 58b7f27..7b13a28 100644 (file)
@@ -1300,6 +1300,7 @@ bool
 _mesa_format_no_online_compression(const struct gl_context *ctx, GLenum format)
 {
    return _mesa_is_astc_format(format) ||
+          _mesa_is_etc2_format(format) ||
           compressedteximage_only_format(ctx, format);
 }
 
@@ -2587,10 +2588,16 @@ check_rtt_cb(GLuint key, void *data, void *userData)
              att->Texture == texObj &&
              att->TextureLevel == level &&
              att->CubeMapFace == face) {
-            _mesa_update_texture_renderbuffer(ctx, ctx->DrawBuffer, att);
+            _mesa_update_texture_renderbuffer(ctx, fb, att);
             assert(att->Renderbuffer->TexImage);
             /* Mark fb status as indeterminate to force re-validation */
             fb->_Status = 0;
+
+            /* Make sure that the revalidation actually happens if this is
+             * being done to currently-bound buffers.
+             */
+            if (fb == ctx->DrawBuffer || fb == ctx->ReadBuffer)
+               ctx->NewState |= _NEW_BUFFERS;
          }
       }
    }
index f4a0760..72ed869 100644 (file)
@@ -179,9 +179,7 @@ clear_texture_fields(struct gl_context *ctx,
             return;
         }
 
-         _mesa_init_teximage_fields(ctx, texImage,
-                                    0, 0, 0, 0, /* w, h, d, border */
-                                    GL_NONE, MESA_FORMAT_NONE);
+         _mesa_clear_texture_image(ctx, texImage);
       }
    }
 }
index 9a280fc..5f76241 100644 (file)
@@ -37,6 +37,7 @@
 #include "main/enums.h"
 #include "main/formats.h"
 #include "main/glformats.h"
+#include "main/texcompress.h"
 #include "main/texgetimage.h"
 #include "main/teximage.h"
 #include "main/texstore.h"
@@ -2282,6 +2283,12 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
    }
 
    if (pFormat == PIPE_FORMAT_NONE) {
+      /* lie about using etc1/etc2 natively if we do decoding tricks */
+      mFormat = _mesa_glenum_to_compressed_format(internalFormat);
+      if ((mFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1) ||
+          (_mesa_is_format_etc2(mFormat) && !st->has_etc2))
+          return mFormat;
+
       /* no luck at all */
       return MESA_FORMAT_NONE;
    }
index aa443a5..54db9b1 100644 (file)
@@ -1136,7 +1136,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
    uval[0].u = *(uint32_t *)&val;
    uval[1].u = *(((uint32_t *)&val) + 1);
    src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
-
+   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
    return src;
 }
 
@@ -1958,12 +1958,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
          emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_bitcast_f2i:
-      result_src = op[0];
-      result_src.type = GLSL_TYPE_INT;
-      break;
    case ir_unop_bitcast_f2u:
-      result_src = op[0];
-      result_src.type = GLSL_TYPE_UINT;
+      /* Make sure we don't propagate the negate modifier to integer opcodes. */
+      if (op[0].negate)
+         emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      else
+         result_src = op[0];
+      result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT :
+                                                               GLSL_TYPE_UINT;
       break;
    case ir_unop_bitcast_i2f:
    case ir_unop_bitcast_u2f:
@@ -6045,7 +6047,11 @@ st_translate_program(
                               inputSemanticName[i], inputSemanticIndex[i],
                               interpMode[i], 0, interpLocation[i],
                               array_id, array_size);
-            i += array_size - 1;
+
+            GLuint base_attr = inputSlotToAttr[i];
+            while (i + 1 < numInputs &&
+                   inputSlotToAttr[i + 1] < base_attr + array_size)
+               ++i;
          }
          else {
             t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
index dffa52f..4f599dd 100644 (file)
@@ -65,6 +65,7 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface,
 
    struct pipe_video_buffer *buffer;
    struct pipe_sampler_view **samplers;
+   struct pipe_resource *res = NULL;
 
    getProcAddr = (void *)ctx->vdpGetProcAddress;
    if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f))
@@ -82,7 +83,8 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface,
    if (!sv)
       return NULL;
 
-   return sv->texture;
+   pipe_resource_reference(&res, sv->texture);
+   return res;
 }
 
 static struct pipe_resource *
@@ -90,13 +92,15 @@ st_vdpau_output_surface_gallium(struct gl_context *ctx, const void *vdpSurface)
 {
    int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr);
    uint32_t device = (uintptr_t)ctx->vdpDevice;
+   struct pipe_resource *res = NULL;
    VdpOutputSurfaceGallium *f;
 
    getProcAddr = (void *)ctx->vdpGetProcAddress;
    if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f))
       return NULL;
 
-   return f((uintptr_t)vdpSurface);
+   pipe_resource_reference(&res, f((uintptr_t)vdpSurface));
+   return res;
 }
 
 static struct pipe_resource *
@@ -208,6 +212,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
    /* do we have different screen objects ? */
    if (res->screen != st->pipe->screen) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
+      pipe_resource_reference(&res, NULL);
       return;
    }
 
@@ -241,6 +246,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
    stObj->surface_format = res->format;
 
    _mesa_dirty_texobj(ctx, texObj);
+   pipe_resource_reference(&res, NULL);
 }
 
 static void
index 87ed7f7..96ed84f 100644 (file)
@@ -814,6 +814,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
    prim[0].basevertex = basevertex;
    prim[0].num_instances = numInstances;
    prim[0].base_instance = baseInstance;
+   prim[0].draw_id = 0;
 
    /* Need to give special consideration to rendering a range of
     * indices starting somewhere above zero.  Typically the
index 97a1dfd..fafdf1d 100644 (file)
@@ -1167,8 +1167,8 @@ _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count)
  * then emitting an indexed prim at runtime.
  */
 static void GLAPIENTRY
-_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
-                       const GLvoid * indices)
+_save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+                                 const GLvoid * indices, GLint basevertex)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct vbo_save_context *save = &vbo_context(ctx)->save;
@@ -1205,15 +1205,15 @@ _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
    switch (type) {
    case GL_UNSIGNED_BYTE:
       for (i = 0; i < count; i++)
-         CALL_ArrayElement(GET_DISPATCH(), (((GLubyte *) indices)[i]));
+         CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLubyte *) indices)[i]));
       break;
    case GL_UNSIGNED_SHORT:
       for (i = 0; i < count; i++)
-         CALL_ArrayElement(GET_DISPATCH(), (((GLushort *) indices)[i]));
+         CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLushort *) indices)[i]));
       break;
    case GL_UNSIGNED_INT:
       for (i = 0; i < count; i++)
-         CALL_ArrayElement(GET_DISPATCH(), (((GLuint *) indices)[i]));
+         CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLuint *) indices)[i]));
       break;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glDrawElements(type)");
@@ -1225,6 +1225,13 @@ _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
    _ae_unmap_vbos(ctx);
 }
 
+static void GLAPIENTRY
+_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
+                       const GLvoid * indices)
+{
+   _save_OBE_DrawElementsBaseVertex(mode, count, type, indices, 0);
+}
+
 
 static void GLAPIENTRY
 _save_OBE_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
@@ -1462,6 +1469,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
 {
    SET_DrawArrays(exec, _save_OBE_DrawArrays);
    SET_DrawElements(exec, _save_OBE_DrawElements);
+   SET_DrawElementsBaseVertex(exec, _save_OBE_DrawElementsBaseVertex);
    SET_DrawRangeElements(exec, _save_OBE_DrawRangeElements);
    SET_MultiDrawElementsEXT(exec, _save_OBE_MultiDrawElements);
    SET_MultiDrawElementsBaseVertex(exec, _save_OBE_MultiDrawElementsBaseVertex);
index cb27ef9..1c3474c 100644 (file)
@@ -243,7 +243,7 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
 static GLuint
 elt(struct copy_context *copy, GLuint elt_idx)
 {
-   GLuint elt = copy->srcelt[elt_idx];
+   GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
    GLuint slot = elt & (ELT_TABLE_SIZE-1);
 
 /*    printf("elt %d\n", elt); */